<!DOCTYPE html>
<html lang="zh-CN">

<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>arXiv 每日论文精选</title>
    <script src="https://cdn.tailwindcss.com"></script>
    <link href="https://cdn.jsdelivr.net/npm/font-awesome@4.7.0/css/font-awesome.min.css" rel="stylesheet">
    <link rel="stylesheet" href="static/styles.css?v=1761467353">
    <script src="static/tailwind.config.js"></script>

    <style>
        /* 分级折叠功能样式 */
        .collapsed-level-1 .paper-details {
            display: none;
        }
        
        .collapsed-level-2 {
            display: none !important;
        }
        
        /* 展开/折叠图标样式 */
        .expand-icon {
            display: inline-block;
            width: 20px;
            text-align: center;
            margin-right: 5px;
        }
        
        /* 展开/折叠按钮样式 */
        .expand-toggle {
            cursor: pointer;
            padding: 8px 12px;
            background-color: #f3f4f6;
            border: 1px solid #e5e7eb;
            border-radius: 6px;
            margin-bottom: 16px;
            text-align: center;
            font-weight: 500;
            color: #4b5563;
            transition: all 0.2s ease;
        }
        
        .expand-toggle:hover {
            background-color: #e5e7eb;
        }
        
        /* 分割线样式 */
        .papers-divider {
            height: 1px;
            background-color: #e5e7eb;
            margin: 20px 0;
            position: relative;
        }
        
        .papers-divider-label {
            position: absolute;
            left: 50%;
            top: 50%;
            transform: translate(-50%, -50%);
            background-color: white;
            padding: 0 12px;
            color: #9ca3af;
            font-size: 14px;
            cursor: pointer;
        }
        
        .papers-divider-label:hover {
            color: #4b5563;
        }
        
        /* 展开后的样式 */
        .expanded-all .collapsed-level-1 .paper-details,
        .expanded-all .collapsed-level-2 {
            display: block;
        }
        
        .expanded-level-2 .collapsed-level-2 {
            display: block;
        }
    </style>
    </head>

<body class="bg-gray-50 font-sans text-dark">
    <!-- 顶部导航与统计信息合并 -->
    <header class="bg-white shadow-sm sticky top-0 z-10 border-b border-gray-200">
        <div class="container mx-auto px-4 py-4">
            <div class="flex flex-col md:flex-row justify-between items-start md:items-center mb-3">
                <div class="flex items-center">
                    <i class="fa fa-book text-primary text-xl mr-2"></i>
                    <h1 class="text-lg md:text-xl font-bold text-gray-800">arXiv 每日论文精选</h1>
                </div>
                <div class="flex items-center mt-2 md:mt-0">
                    <span id="current-date" class="text-gray-600 text-sm">
                        <i class="fa fa-calendar-o mr-1"></i>2025-10-16
                    </span>
                    <div class="ml-3 relative" id="date-picker-container">
                        <button id="date-picker-toggle" class="bg-light border border-gray-300 text-gray-700 py-1 px-3 pr-6 rounded text-sm leading-tight focus:outline-none focus:bg-white inline-flex items-center">
                            <i class="fa fa-calendar mr-2"></i>
                            <span id="selected-date-text">2025-10-16</span>
                            <i class="fa fa-chevron-down ml-2 text-xs"></i>
                        </button>
                        <div id="date-picker" class="hidden absolute right-0 mt-1 bg-white border border-gray-300 rounded shadow-lg p-2 z-20 w-56">
                            <div class="flex justify-between items-center mb-2">
                                <button id="prev-month" class="text-gray-500 hover:text-gray-700"><i class="fa fa-chevron-left"></i></button>
                                <h4 id="current-month">2025-10-16</h4>
                                <button id="next-month" class="text-gray-500 hover:text-gray-700"><i class="fa fa-chevron-right"></i></button>
                            </div>
                            <div class="grid grid-cols-7 gap-1 text-center text-xs mb-1">
                                <div class="text-gray-500">日</div>
                                <div class="text-gray-500">一</div>
                                <div class="text-gray-500">二</div>
                                <div class="text-gray-500">三</div>
                                <div class="text-gray-500">四</div>
                                <div class="text-gray-500">五</div>
                                <div class="text-gray-500">六</div>
                            </div>
                            <div id="calendar-grid" class="grid grid-cols-7 gap-1 text-center text-sm">
                                <!-- 日历格子将通过JavaScript动态生成 -->
                            </div>
                        </div>
                    </div>
                </div>
            </div>

            <!-- 统计信息 -->
            <div class="flex flex-wrap gap-4 text-sm">
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-file-text-o"></i> 总论文数:</span>
                    <span id="total-papers" class="font-semibold text-primary">166</span>
                </div>
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-star"></i> 精选论文数:</span>
                    <span id="selected-papers" class="font-semibold text-accent">20</span>
                </div>
                <div class="flex items-center">
                    <span class="text-gray-500 mr-1"><i class="fa fa-line-chart"></i> 平均评分:</span>
                    <span id="avg-score" class="font-semibold text-secondary">2.7</span>
                </div>
            </div>
        </div>
    </header>

    <!-- 主内容区 -->
    <main class="container mx-auto px-4 py-5">
        <!-- 筛选器 -->
        <div class="mb-4 flex flex-col sm:flex-row justify-between items-start sm:items-center">
            <div class="text-gray-700 text-sm mb-2 sm:mb-0">
                <span id="display-count" class="font-medium">显示 166 篇论文 (共 166 篇)</span>
            </div>
            <div class="flex space-x-2">
                <button id="show-all"
                    class="px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors">
                    全部论文
                </button>
                <button id="show-selected"
                    class="px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors">
                    仅显示精选
                </button>
            </div>
        </div>

        <!-- 论文列表 -->
        <div id="papers-container" class="grid grid-cols-1 gap-4">
            
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13738v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>HyMiRec：一种用于基于大语言模型的序列推荐的混合多兴趣学习框架
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            HyMiRec: A Hybrid Multi-interest Learning Framework for LLM-based Sequential Recommendation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jingyi Zhou, Cheng Chen, Kai Zuo, Manjie Xu, Zhendong Fu, Yibo Chen, Xu Tang, Ya...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究LLM在序列推荐中因序列截断导致长程偏好信号丢失和单一预测忽略用户兴趣多样性的问题，核心方法是提出混合多兴趣学习框架，通过轻量推荐器提取粗粒度兴趣嵌入和LLM推荐器捕获细粒度兴趣嵌入，并设计解耦多兴趣学习模块自适应学习多个兴趣信号。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对LLM在序列推荐中的两大核心限制提出解决方案，与LLM应用和Transformer架构优化高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:45:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13738v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13738v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) have recently demonstrated strong potential for sequential recommendation. However, current LLM-based approaches face critical limitations in modeling users' long-term and diverse interests. First, due to inference latency and feature fetching bandwidth constraints, existing methods typically truncate user behavior sequences to include only the most recent interactions, resulting in the loss of valuable long-range preference signals. Second, most current methods rely on next-item prediction with a single predicted embedding, overlooking the multifaceted nature of user interests and limiting recommendation diversity. To address these challenges, we propose HyMiRec, a hybrid multi-interest sequential recommendation framework, which leverages a lightweight recommender to extracts coarse interest embeddings from long user sequences and an LLM-based recommender to captures refined interest embeddings. To alleviate the overhead of fetching features, we introduce a residual codebook based on cosine similarity, enabling efficient compression and reuse of user history embeddings. To model the diverse preferences of users, we design a disentangled multi-interest learning module, which leverages multiple interest queries to learn disentangles multiple interest signals adaptively, allowing the model to capture different facets of user intent. Extensive experiments are conducted on both benchmark datasets and a collected industrial dataset, demonstrating our effectiveness over existing state-of-the-art methods. Furthermore, online A/B testing shows that HyMiRec brings consistent improvements in real-world recommendation systems.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13371v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>MADREC：一种面向可解释和自适应推荐的多方面驱动大语言模型智能体
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MADREC: A Multi-Aspect Driven LLM Agent for Explainable and Adaptive Recommendation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiin Park, Misuk Kim
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何构建能捕捉用户偏好复杂性的LLM推荐系统，核心方法是提出多维度驱动的LLM代理，通过无监督提取评论中的多维度信息构建用户和物品画像，并采用重排序和自反馈机制实现自适应推荐。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接应用LLM构建多维度用户画像并实现自适应推荐，完美契合直接LLM应用和推荐系统核心进展的研究重点。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 10:03:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13371v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13371v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent attempts to integrate large language models (LLMs) into recommender systems have gained momentum, but most remain limited to simple text generation or static prompt-based inference, failing to capture the complexity of user preferences and real-world interactions. This study proposes the Multi-Aspect Driven LLM Agent MADRec, an autonomous LLM-based recommender that constructs user and item profiles by unsupervised extraction of multi-aspect information from reviews and performs direct recommendation, sequential recommendation, and explanation generation. MADRec generates structured profiles via aspect-category-based summarization and applies Re-Ranking to construct high-density inputs. When the ground-truth item is missing from the output, the Self-Feedback mechanism dynamically adjusts the inference criteria. Experiments across multiple domains show that MADRec outperforms traditional and LLM-based baselines in both precision and explainability, with human evaluation further confirming the persuasiveness of the generated explanations.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13359v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>基于视觉语言模型改进电商平台的视觉推荐
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Improving Visual Recommendation on E-commerce Platforms Using Vision-Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuki Yada, Sho Akiyama, Ryo Watanabe, Yuta Ueno, Yusuke Shido, Andre Rusli
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究电商平台中基于视觉相似性的产品推荐问题，其核心方法是利用视觉语言模型（VLM）对产品图像和标题进行联合建模，通过微调SigLIP模型生成用于推荐系统的商品嵌入表示。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接应用视觉语言模型于电商推荐系统，完美契合VLM类比异构数据和直接LLM应用两个重点领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:46:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13359v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13359v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    On large-scale e-commerce platforms with tens of millions of active monthly users, recommending visually similar products is essential for enabling users to efficiently discover items that align with their preferences. This study presents the application of a vision-language model (VLM) -- which has demonstrated strong performance in image recognition and image-text retrieval tasks -- to product recommendations on Mercari, a major consumer-to-consumer marketplace used by more than 20 million monthly users in Japan. Specifically, we fine-tuned SigLIP, a VLM employing a sigmoid-based contrastive loss, using one million product image-title pairs from Mercari collected over a three-month period, and developed an image encoder for generating item embeddings used in the recommendation system. Our evaluation comprised an offline analysis of historical interaction logs and an online A/B test in a production environment. In offline analysis, the model achieved a 9.1% improvement in nDCG@5 compared with the baseline. In the online A/B test, the click-through rate improved by 50% whereas the conversion rate improved by 14% compared with the existing model. These results demonstrate the effectiveness of VLM-based encoders for e-commerce product recommendations and provide practical insights into the development of visual similarity-based recommendation systems.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13229v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>超越静态大语言模型策略：用于推荐的模仿增强强化学习
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Static LLM Policies: Imitation-Enhanced Reinforcement Learning for Recommendation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yi Zhang, Lili Xie, Ruihong Qiu, Jiajun Liu, Sen Wang
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何克服LLM直接作为推荐策略的延迟和幻觉问题。核心思想是利用模仿学习从LLM生成的轨迹中提取奖励模型，通过离线强化学习框架将LLM的语义理解转移到高效策略中，无需微调LLM。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对LLM在推荐系统中的应用挑战，提出结合模仿学习和强化学习的新框架，完美契合核心关注点。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:28:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13229v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13229v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recommender systems (RecSys) have become critical tools for enhancing user engagement by delivering personalized content across diverse digital platforms. Recent advancements in large language models (LLMs) demonstrate significant potential for improving RecSys, primarily due to their exceptional generalization capabilities and sophisticated contextual understanding, which facilitate the generation of flexible and interpretable recommendations. However, the direct deployment of LLMs as primary recommendation policies presents notable challenges, including persistent latency issues stemming from frequent API calls and inherent model limitations such as hallucinations and biases. To address these issues, this paper proposes a novel offline reinforcement learning (RL) framework that leverages imitation learning from LLM-generated trajectories. Specifically, inverse reinforcement learning is employed to extract robust reward models from LLM demonstrations. This approach negates the need for LLM fine-tuning, thereby substantially reducing computational overhead. Simultaneously, the RL policy is guided by the cumulative rewards derived from these demonstrations, effectively transferring the semantic insights captured by the LLM. Comprehensive experiments conducted on two benchmark datasets validate the effectiveness of the proposed method, demonstrating superior performance when compared against state-of-the-art RL-based and in-context learning baselines. The code can be found at https://github.com/ArronDZhang/IL-Rec.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13217v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>LLM引导的分层检索
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LLM-guided Hierarchical Retrieval
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nilesh Gupta, Wei-Cheng Chang, Ngot Bui, Cho-Jui Hsieh, Inderjit S. Dhillon
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究复杂查询下的高效文档检索问题，核心思想是通过构建语义层次树结构并设计LLM导航算法，实现对数复杂度的层次化检索。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对搜索系统中的核心检索问题，提出LLM引导的层次检索框架，属于LLM在搜索领域的直接应用创新，与关注点高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:05:17
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13217v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13217v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Modern IR systems are increasingly tasked with answering complex, multi-faceted queries that require deep reasoning rather than simple keyword or semantic matching. While LLM-based IR has shown great promise, the prevailing retrieve-then-rerank paradigm inherits the limitations of embedding-based retrieval; parametric generative approaches are difficult to update with new information; and long-context methods that place the entire corpus in context are computationally infeasible for large document collections. To address these challenges, we introduce LATTICE, a hierarchical retrieval framework that enables an LLM to reason over and navigate large corpora with logarithmic search complexity by imposing a semantic tree structure on the corpus. Our approach consists of two stages: (1) an offline phase that organizes the corpus into a semantic hierarchy via either a bottom-up agglomerative strategy or a top-down divisive strategy using multi-level summaries and (2) an online traversal phase where a search LLM navigates this tree. A central challenge in such LLM-guided search is that the model's relevance judgments are noisy, context-dependent, and unaware of the hierarchy, making cross-branch and cross-level comparisons difficult. To overcome this, we propose a traversal algorithm that estimates calibrated latent relevance scores from local LLM outputs and aggregates them into a global path relevance metric. Our training-free framework achieves state-of-the-art zero-shot performance on the reasoning-intensive BRIGHT benchmark, demonstrating up to 9% improvement in Recall@100 and 5% in nDCG@10 over the next best zero-shot baseline. Furthermore, compared to the fine-tuned SOTA method DIVER-v2, LATTICE attains comparable results on BRIGHT subsets that use a static corpus for evaluation.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13095v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>链式检索：自举大语言模型实现生成式检索
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Retrieval-in-the-Chain: Bootstrapping Large Language Models for Generative Retrieval
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yingchen zhang, Ruqing zhang, Jiafeng Guo, Wenjun Peng, Sen Li, Fuyu Lv
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究如何利用大语言模型的推理能力改进生成式检索；核心方法是提出结构化推理框架R4R，将自由形式推理转换为紧凑结构格式，并在检索过程中迭代优化推理。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文将推理能力引入生成式检索，提出结构化推理迭代优化方法，直接提升检索性能，与搜索和推荐系统高度相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 02:29:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13095v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13095v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Generative retrieval (GR) is an emerging paradigm that leverages large language models (LLMs) to autoregressively generate document identifiers (docids) relevant to a given query. Prior works have focused on leveraging the generative capabilities of LLMs to improve GR, while overlooking that their reasoning capabilities could likewise help. This raises a key question: Can explicit reasoning benefit GR? To investigate, we first conduct a preliminary study where an LLM is prompted to generate free-form chain-of-thought (CoT) reasoning before performing constrained docid decoding. Although this method outperforms standard GR, the generated reasoning tends to be verbose and poorly aligned with the docid space. These limitations motivate the development of a reasoning mechanism better tailored to GR. Therefore, we propose Reason-for-Retrieval (R4R), a reasoning-augmented framework for GR that converts free-form CoT reasoning into a compact, structured format, and iteratively refines the reasoning during the retrieval process. R4R augments an existing GR method by leveraging a reasoning-capable LLM that has been instruction-tuned for GR. At inference time, R4R first uses the LLM to generate an initial structured reasoning; then the same LLM alternates between (i) constrained decoding with the chosen GR method to produce candidate docids and (ii) updating the reasoning based on retrieval results to improve the next round. R4R does not require additional models or training, and instead a single LLM serves as both the reasoning generator and the retriever. Extensive experiments on Natural Questions, MS MARCO, and a real-world item-search benchmark validate the effectiveness of R4R.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13602v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>NOSA：原生与可卸载稀疏注意力
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            NOSA: Native and Offloadable Sparse Attention
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuxiang Huang, Chaojun Xiao, Xu Han, Zhiyuan Liu
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究LLM长上下文处理中KV缓存导致的解码效率瓶颈问题，核心思想是通过分解令牌选择为查询感知和查询无关组件，在保持训练时注意力计算不变的前提下实现高效的KV缓存卸载。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对LLM推理效率瓶颈，提出了可训练稀疏注意力框架，通过KV缓存卸载技术显著提升解码吞吐量，与Transformer架构效率和LLM应用直接相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:33:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13602v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13602v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Trainable sparse attention has emerged as a promising solution to address the decoding efficiency bottleneck of LLMs in long-context processing, significantly saving memory accesses while minimally impacting task performance. However, existing sparse attention methods leave a crucial limitation unresolved: the size of the key-value (KV) cache remains unreduced, which constrains on-GPU batch sizes and throttles decoding throughput, especially in large-scale batched inference. In this paper, we show that trainable sparse attention naturally exhibits strong locality in token selection across adjacent decoding steps, thereby enabling KV cache offloading without altering the underlying attention computation. However, the inherent locality remains insufficient to achieve efficient offloading, as the transfer of selected KV pairs between the CPU and GPU continues to dominate the overall decoding cost. Building on this insight, we present NOSA, a trainable sparse attention framework designed to natively support KV cache offloading. NOSA introduces explicit locality constraints by decomposing token selection into query-aware and query-agnostic components, thereby reducing KV transfers while preserving the same attention computation as used during training. We pretrain a 1B-parameter model with NOSA and conduct extensive benchmarks, showing that it preserves near-lossless performance while achieving up to a 2.3x improvement in decoding throughput compared with the vanilla trainable sparse attention baseline (InfLLM-V2).
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13554v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>注意力机制照亮LLM推理：预规划与锚定节奏实现细粒度策略优化
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Attention Illuminates LLM Reasoning: The Preplan-and-Anchor Rhythm Enables Fine-Grained Policy Optimization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yang Li, Zhichen Dong, Yuhan Sun, Weixun Wang, Shaopan Xiong, Yijia Luo, Jiashun...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究LLM推理过程中注意力机制的内在模式识别问题，核心思想是通过分析注意力头的局部与全局聚焦特征，发现预规划-锚点节奏机制，并基于此设计针对关键推理节点的强化学习信用分配策略。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文通过注意力机制揭示LLM推理的内在节奏，并提出基于关键节点的细粒度RL优化策略，直接关联Transformer架构分析和LLM在推理任务中的应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 13:49:51
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13554v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13554v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The reasoning pattern of Large language models (LLMs) remains opaque, and Reinforcement learning (RL) typically applies uniform credit across an entire generation, blurring the distinction between pivotal and routine steps. This work positions attention as a privileged substrate that renders the internal logic of LLMs legible, not merely as a byproduct of computation, but as a mechanistic blueprint of reasoning itself. We first distinguish attention heads between locally and globally focused information processing and reveal that locally focused heads produce a sawtooth pattern near the diagonal indicating phrasal chunks, while globally focused heads expose tokens that exert broad downstream influence over future tokens. We formalize these with two metrics: 1) Windowed Average Attention Distance, which measures the extent of backward attention within a clipped window; 2) Future Attention Influence, which quantifies a token's global importance as the average attention it receives from subsequent tokens. Taken together, these signals reveal a recurring preplan-and-anchor mechanism, where the model first performs a long-range contextual reference to generate an introductory token, which is immediately followed by or coincides with a semantic anchor token that organizes subsequent reasoning. Leveraging these insights, we introduce three novel RL strategies that dynamically perform targeted credit assignment to critical nodes (preplan tokens, anchor tokens, and their temporal coupling) and show consistent performance gains across various reasoning tasks. By aligning optimization with the model's intrinsic reasoning rhythm, we aim to transform opaque optimization into an actionable structure-aware process, hoping to offer a potential step toward more transparent and effective optimization of LLM reasoning.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13334v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>驯服大语言模型推理中键值缓存驱逐的脆弱性
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Taming the Fragility of KV Cache Eviction in LLM Inference
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yuan Feng, Haoyu Guo, JunLin Lv, S. Kevin Zhou, Xike Xie
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究LLM推理中KV缓存驱逐的脆弱性问题，核心思想是通过两阶段线性时间防御性聚合策略控制最坏情况风险，提升缓存管理鲁棒性。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接解决LLM推理中的KV缓存效率问题，这是Transformer架构优化的核心挑战，对搜索和推荐系统的实时部署至关重要。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:18:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13334v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13334v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models have revolutionized natural language processing, yet their deployment remains hampered by the substantial memory and runtime overhead of the transformer's Key-Value cache. To mitigate this, recent methods employ a scoring-aggregation framework to evict unimportant cache entries, based on the stability assumption-that a fixed subset of entries remains consistently important during generation. However, prior work has largely focused on refining importance indicators for scoring, while defaulting to mean aggregation due to a faithful trust in the stability assumption. In this work, we argue that this underlying assumption is inherently fragile, making mean aggregation highly vulnerable in extreme cases. To counter this, we propose a simple yet elegant defensive aggregation strategy: a two-step, linear-time approach that controls worst-case risk, thereby defending against extreme cases with negligible computational overhead. Embodying this strategy, we propose a novel cache eviction method, DefensiveKV and its extension, Layer-DefensiveKV, which incorporates layer-wise budget allocation. Across seven task domains (18 datasets), our methods reduce generation quality loss by 2.3x and 4.3x respectively, versus the strongest baseline under a 20% cache size. These results set new performance benchmarks and pioneer a promising direction for optimizing cache eviction against underlying fragility through worst-case risk management. Our code is available at https://github.com/FFY0/DefensiveKV.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13291v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>更高满意度，更低成本：关于大语言模型如何革新美团智能交互系统的技术报告
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Higher Satisfaction, Lower Cost: A Technical Report on How LLMs Revolutionize Meituan's Intelligent Interaction Systems
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xuxin Cheng, Ke Zeng, Zhiquan Cao, Linyi Dai, Wenxuan Gao, Fei Han, Ai Jian, Fen...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何解决智能交互系统在工业应用中的核心挑战，核心方法是集成LLM和多智能体架构构建WOWService系统，通过数据构建、能力增强、场景适配、智能体协作和自动化评估等模块实现自主任务管理和协同问题解决。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接应用LLM技术构建智能交互系统，解决推荐/搜索领域的关键挑战，包括冷启动、多轮对话、业务规则演进和多智能体协作，与关注领域高度契合。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:35:51
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13291v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13291v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Enhancing customer experience is essential for business success, particularly as service demands grow in scale and complexity. Generative artificial intelligence and Large Language Models (LLMs) have empowered intelligent interaction systems to deliver efficient, personalized, and 24/7 support. In practice, intelligent interaction systems encounter several challenges: (1) Constructing high-quality data for cold-start training is difficult, hindering self-evolution and raising labor costs. (2) Multi-turn dialogue performance remains suboptimal due to inadequate intent understanding, rule compliance, and solution extraction. (3) Frequent evolution of business rules affects system operability and transferability, constraining low-cost expansion and adaptability. (4) Reliance on a single LLM is insufficient in complex scenarios, where the absence of multi-agent frameworks and effective collaboration undermines process completeness and service quality. (5) The open-domain nature of multi-turn dialogues, lacking unified golden answers, hampers quantitative evaluation and continuous optimization. To address these challenges, we introduce WOWService, an intelligent interaction system tailored for industrial applications. With the integration of LLMs and multi-agent architectures, WOWService enables autonomous task management and collaborative problem-solving. Specifically, WOWService focuses on core modules including data construction, general capability enhancement, business scenario adaptation, multi-agent coordination, and automated evaluation. Currently, WOWService is deployed on the Meituan App, achieving significant gains in key metrics, e.g., User Satisfaction Metric 1 (USM 1) -27.53% and User Satisfaction Metric 2 (USM 2) +25.51%, demonstrating its effectiveness in capturing user needs and advancing personalized service.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13079v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>GatePro：面向专家混合模型的免参数专家选择优化
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>9/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            GatePro: Parameter-Free Expert Selection Optimization for Mixture-of-Experts Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chen Zheng, Yuhang Cai, Deyi Liu, Jin Ma, Yiyuan Ma, Yuan Yang, Jing Liu, Yutao ...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究MoE模型中专家功能冗余导致计算浪费和有效容量受限的问题。核心方法是识别最相似专家对并引入局部竞争机制，防止冗余专家同时激活，同时保持专家自然专业化。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文针对MoE架构的核心效率问题提出参数免费优化方法，直接提升专家多样性，对Transformer架构效率和LLM规模化具有重要价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 01:47:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13079v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13079v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Modern large language models leverage Mixture-of-Experts (MoE) architectures for efficient scaling, but face a critical challenge: functionally similar experts are often selected simultaneously, creating redundant computation and limiting effective model capacity. Existing auxiliary balance loss methods improve token distribution but fail to address the underlying expert diversity problem. We introduce GatePro, a novel parameter-free method that directly promotes expert selection diversity. GatePro identifies the most similar expert pairs and introduces localized competition mechanisms, preventing redundant expert co-activation while maintaining natural expert specialization. Our comprehensive evaluation demonstrates GatePro's effectiveness across model scales and benchmarks. Analysis demonstrates GatePro's ability to achieve enhanced expert diversity, where experts develop more distinct and complementary capabilities, avoiding functional redundancy. This approach can be deployed hot-swappable during any training phase without additional learnable parameters, offering a practical solution for improving MoE effectiveness.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13590v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>RAG与时间图交汇：面向演化知识的时序敏感建模与检索
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RAG Meets Temporal Graphs: Time-Sensitive Modeling and Retrieval for Evolving Knowledge
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiale Han, Austin Cheung, Yubai Wei, Zheng Yu, Xusheng Wang, Bing Zhu, Yi Yang
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究RAG系统如何处理时间敏感知识演化的核心问题，核心方法是构建包含时序知识图和层次时间图的双层次时序图结构，通过多粒度时间摘要和增量更新机制来区分不同时间的相同事实。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对RAG系统的时间感知问题，提出了双层次时序图建模方法，对搜索和推荐系统中的动态知识更新具有重要应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:21:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13590v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13590v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Knowledge is inherently time-sensitive and continuously evolves over time. Although current Retrieval-Augmented Generation (RAG) systems enrich LLMs with external knowledge, they largely ignore this temporal nature. This raises two challenges for RAG. First, current RAG methods lack effective time-aware representations. Same facts of different time are difficult to distinguish with vector embeddings or conventional knowledge graphs. Second, most RAG evaluations assume a static corpus, leaving a blind spot regarding update costs and retrieval stability as knowledge evolves. To make RAG time-aware, we propose Temporal GraphRAG (TG-RAG), which models external corpora as a bi-level temporal graph consisting of a temporal knowledge graph with timestamped relations and a hierarchical time graph. Multi-granularity temporal summaries are generated for each time node to capture both key events and broader trends at that time. The design supports incremental updates by extracting new temporal facts from the incoming corpus and merging them into the existing graph. The temporal graph explicitly represents identical facts at different times as distinct edges to avoid ambiguity, and the time hierarchy graph allows only generating reports for new leaf time nodes and their ancestors, ensuring effective and efficient updates. During inference, TG-RAG dynamically retrieves a subgraph within the temporal and semantic scope of the query, enabling precise evidence gathering. Moreover, we introduce ECT-QA, a time-sensitive question-answering dataset featuring both specific and abstract queries, along with a comprehensive evaluation protocol designed to assess incremental update capabilities of RAG systems. Extensive experiments show that TG-RAG significantly outperforms existing baselines, demonstrating the effectiveness of our method in handling temporal knowledge and incremental updates.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13193v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>ReMindRAG：面向高效检索增强生成的低成本大语言模型引导知识图谱遍历
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ReMindRAG: Low-Cost LLM-Guided Knowledge Graph Traversal for Efficient RAG
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yikuan Hu, Jifeng Zhu, Lanrui Tang, Chen Huang
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究知识图谱增强检索生成(RAG)系统中效率与效果难以兼顾的问题，核心思想是采用LLM引导的图遍历策略，通过节点探索、利用和记忆回放机制，并在图边嵌入中存储遍历经验以实现免训练的记忆功能。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出LLM引导的知识图谱遍历方法，直接应用于搜索和推荐系统的检索增强生成(RAG)领域，同时关注Transformer架构的效率优化问题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:31:29
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13193v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13193v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Knowledge graphs (KGs), with their structured representation capabilities, offer promising avenue for enhancing Retrieval Augmented Generation (RAG) systems, leading to the development of KG-RAG systems. Nevertheless, existing methods often struggle to achieve effective synergy between system effectiveness and cost efficiency, leading to neither unsatisfying performance nor excessive LLM prompt tokens and inference time. To this end, this paper proposes REMINDRAG, which employs an LLM-guided graph traversal featuring node exploration, node exploitation, and, most notably, memory replay, to improve both system effectiveness and cost efficiency. Specifically, REMINDRAG memorizes traversal experience within KG edge embeddings, mirroring the way LLMs "memorize" world knowledge within their parameters, but in a train-free manner. We theoretically and experimentally confirm the effectiveness of REMINDRAG, demonstrating its superiority over existing baselines across various benchmark datasets and LLM backbones. Our code is available at https://github.com/kilgrims/ReMindRAG.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13799v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>BRIEF-Pro：通过短到长合成实现通用上下文压缩，用于快速准确的多跳推理
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            BRIEF-Pro: Universal Context Compression with Short-to-Long Synthesis for Fast and Accurate Multi-Hop Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jia-Chen Gu, Junyi Zhang, Di Wu, Yuankai Li, Kai-Wei Chang, Nanyun Peng
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究检索增强生成中长上下文导致的延迟和认知负载问题，核心方法是训练通用轻量级压缩器，通过短上下文训练实现长文档的抽象压缩，并支持用户自定义摘要长度。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的上下文压缩技术直接解决检索增强生成中的延迟和认知负载问题，对搜索和推荐系统中的长文档处理具有直接应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:57:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13799v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13799v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    As retrieval-augmented generation (RAG) tackles complex tasks, increasingly expanded contexts offer richer information, but at the cost of higher latency and increased cognitive load on the model. To mitigate this bottleneck, especially for intricate multi-hop questions, we introduce BRIEF-Pro. It is a universal, lightweight compressor that distills relevant evidence for a given query from retrieved documents into a concise summary for seamless integration into in-context RAG. Using seed data consisting of relatively short contexts (fewer than 1k words), BRIEF-Pro is trained to perform abstractive compression of extended contexts exceeding 10k words across a wide range of scenarios. Furthermore, BRIEF-Pro offers flexible user control over summary length by allowing users to specify the desired number of sentences. Experiments on four open-domain multi-hop question-answering datasets show that BRIEF-Pro generates more concise and relevant summaries, enhancing performance across small, large, and proprietary language models. With the 70B reader model, 32x compression by BRIEF-Pro improves QA performance by 4.67% on average over LongLLMLingua's 9x, while requiring only 23% of its computational overhead.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13797v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>面包屑推理：基于压缩信标的内存高效推理
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Breadcrumbs Reasoning: Memory-Efficient Reasoning with Compression Beacons
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Giovanni Monea, Yair Feldman, Shankar Padmanabhan, Kianté Brantley, Yoav Artzi
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究Transformer模型长上下文推理中的内存效率问题，核心思想是通过学习特殊压缩标记周期性压缩生成过程中的KV缓存，利用信息价值递减特性实现内存优化。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出KV缓存压缩方法直接解决Transformer架构的效率瓶颈，属于Transformer技术进展的核心领域，对推荐系统和搜索中的长序列处理有重要应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:57:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13797v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13797v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The scalability of large language models for long-context reasoning is severely constrained by the linear growth of their Transformer key-value cache, which incurs significant memory and computational costs. We posit that as a model generates reasoning tokens, the informational value of past generated tokens diminishes, creating an opportunity for compression. In this work, we propose to periodically compress the generation KV cache with a learned, special-purpose token and evict compressed entries. We train the model to perform this compression via a modified joint distillation and reinforcement learning (RL) framework. Our training method minimizes overhead over the conventional RL process, as it leverages RL outputs for distillation. Empirically, our method achieves a superior memory-accuracy Pareto frontier compared to both the model without cache compression and training-free compression techniques.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13721v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>NExT-OMNI：基于离散流匹配实现任意模态到任意模态的全模态基础模型
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            NExT-OMNI: Towards Any-to-Any Omnimodal Foundation Models with Discrete Flow Matching
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Run Luo, Xiaobo Xia, Lu Wang, Longze Chen, Renke Shan, Jing Luo, Min Yang, Tat-S...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">研究下一代多模态基础模型如何实现任意模态间的统一理解与生成；核心方法是采用离散流范式，通过度量诱导概率路径和动力学最优速度实现统一建模，避免传统自回归架构的理解与生成能力不平衡问题。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出的离散流匹配统一建模方法可直接应用于推荐系统的多模态理解与生成，其任意到任意跨模态能力与异构数据处理理念高度契合搜索推荐场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:25:18
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13721v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13721v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.MM</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Next-generation multimodal foundation models capable of any-to-any cross-modal generation and multi-turn interaction will serve as core components of artificial general intelligence systems, playing a pivotal role in human-machine interaction. However, most existing multimodal models remain constrained by autoregressive architectures, whose inherent limitations prevent a balanced integration of understanding and generation capabilities. Although hybrid and decoupling strategies have been explored to address these tasks within unified frameworks separately, their redundant, non-integrated designs limit their applicability to broader scenarios, such as cross-modal retrieval.In this work, we introduce NExT-OMNI, an open-source omnimodal foundation model that achieves unified modeling through discrete flow paradigms. By leveraging metric-induced probability paths and kinetic optimal velocities, NExT-OMNI natively supports any-to-any understanding and generation with enhanced response efficiency, while enabling broader application scenarios through concise unified representations rather than task-decoupled designs. Trained on large-scale interleaved text, image, video, and audio data, NExT-OMNI delivers competitive performance on multimodal generation and understanding benchmarks, while outperforming prior unified models in multi-turn multimodal interaction and cross-modal retrieval, highlighting its architectural advantages as a next-generation multimodal foundation model. To advance further research, we release training details, data protocols, and open-source both the code and model checkpoints.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13329v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>基于嵌入的情境感知重排序器
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Embedding-Based Context-Aware Reranker
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ye Yuan, Mohammad Amin Shabani, Siqi Liu
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究检索增强生成系统中跨段落推理的挑战，核心方法是基于嵌入的轻量级重排序框架，通过结构信息和混合注意力机制增强跨段落理解。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文针对检索系统中跨段落推理的核心挑战，提出轻量级重排序框架，直接应用于搜索和推荐系统的核心环节。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:14:04
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13329v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13329v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Retrieval-Augmented Generation (RAG) systems rely on retrieving relevant evidence from a corpus to support downstream generation. The common practice of splitting a long document into multiple shorter passages enables finer-grained and targeted information retrieval. However, it also introduces challenges when a correct retrieval would require inference across passages, such as resolving coreference, disambiguating entities, and aggregating evidence scattered across multiple sources. Many state-of-the-art (SOTA) reranking methods, despite utilizing powerful large pretrained language models with potentially high inference costs, still neglect the aforementioned challenges. Therefore, we propose Embedding-Based Context-Aware Reranker (EBCAR), a lightweight reranking framework operating directly on embeddings of retrieved passages with enhanced cross-passage understandings through the structural information of the passages and a hybrid attention mechanism, which captures both high-level interactions across documents and low-level relationships within each document. We evaluate EBCAR against SOTA rerankers on the ConTEB benchmark, demonstrating its effectiveness for information retrieval requiring cross-passage inference and its advantages in both accuracy and efficiency.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13191v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>基于上下文归一化的检索增强生成长文本推理
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Grounding Long-Context Reasoning with Contextual Normalization for Retrieval-Augmented Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiamin Chen, Yuchen Li, Xinyu Ma, Xinran Chen, Xiaokun Zhang, Shuaiqiang Wang, C...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究检索增强生成中上下文格式对模型推理性能的影响问题，核心思想是上下文呈现方式（如分隔符、结构标记）会显著影响模型表现，并提出上下文归一化方法来标准化上下文表示以提升长上下文推理的鲁棒性。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文直接针对检索增强生成中的上下文表示问题，提出了上下文归一化方法，对搜索和推荐系统中的内容呈现方式有重要启示。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:28:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13191v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13191v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Retrieval-Augmented Generation (RAG) has become an essential approach for extending the reasoning and knowledge capacity of large language models (LLMs). While prior research has primarily focused on retrieval quality and prompting strategies, the influence of how the retrieved documents are framed, i.e., context format, remains underexplored. We show that seemingly superficial choices, such as delimiters or structural markers in key-value extraction, can induce substantial shifts in accuracy and stability, even when semantic content is identical. To systematically investigate this effect, we design controlled experiments that vary context density, delimiter styles, and positional placement, revealing the underlying factors that govern performance differences. Building on these insights, we introduce Contextual Normalization, a lightweight strategy that adaptively standardizes context representations before generation. Extensive experiments on both controlled and real-world RAG benchmarks across diverse settings demonstrate that the proposed strategy consistently improves robustness to order variation and strengthens long-context utilization. These findings underscore that reliable RAG depends not only on retrieving the right content, but also on how that content is presented, offering both new empirical evidence and a practical technique for better long-context reasoning.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13170v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>戴上思考帽：从人类推理机制视角审视思维链微调的研究综述
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Putting on the Thinking Hats: A Survey on Chain of Thought Fine-tuning from the Perspective of Human Reasoning Mechanism
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiaoshu Chen, Sihang Zhou, Ke Liang, Duanyang Yuan, Haoyuan Chen, Xiaoyu Sun, Li...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">论文研究如何从人类推理机制视角系统分析链式思维微调技术，核心思想是借鉴六顶思考帽框架将CoT微调方法按人类思维模式进行分类，为开发类人推理的LLM提供理论指导。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文从人类推理机制角度系统分析CoT微调，直接关联LLM推理能力提升，对推荐和搜索中的复杂推理任务具有重要启发价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 05:54:13
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13170v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13170v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Chain of thought (CoT) fine-tuning aims to endow large language models (LLMs) with reasoning capabilities by training them on curated reasoning traces. It leverages both supervised and reinforced fine-tuning to cultivate human-like reasoning skills in LLMs, including detailed planning, divergent thinking, intuitive judgment, timely reflection, internal thinking, and fact perception, etc. As CoT fine-tuning has advanced, LLMs have demonstrated substantial improvements in tasks such as mathematical reasoning and code generation. However, existing surveys about CoT fine-tuning primarily focus on technical aspects and overlook a systematic analysis from the perspective of human reasoning mechanisms. Given that the ultimate goal of CoT fine-tuning is to enable LLMs to reason like humans, it is crucial to investigate this technique through the lens of human cognition. To fill this gap, we present the first comprehensive survey of CoT fine-tuning grounded in human reasoning theory. Specifically, inspired by the well-known Six Thinking Hats framework, which systematically characterizes common human thinking modes using six metaphorical hats, we classify and examine CoT fine-tuning methods through this lens. Furthermore, building upon this theory, we outline potential directions for future research in CoT fine-tuning. In addition, we compile a comprehensive overview of existing datasets and model performances, and a real-time GitHub repository \footnote{https://github.com/AI-Chen/Awesome-CoT-Finetuning} that continuously tracks recent advances in this area is maintained. We hope this survey will serve as a valuable resource to inspire innovation and foster progress in this rapidly evolving field.
                </div>
            </details>
    </div>
</div>
<div class="paper-card p-4 expanded">
    <div class="flex justify-between items-start mb-2">
        <h3 class="text-lg font-semibold text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13161v1" target="_blank" rel="noopener noreferrer">
                <i class="fa fa-star text-yellow-400 mr-1"></i>镜像推测解码：打破大语言模型推理中的串行障碍
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>8/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Mirror Speculative Decoding: Breaking the Serial Barrier in LLM Inference
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nikhil Bhendawade, Kumari Nishu, Arnav Kundu, Chris Bartels, Minsik Cho, Irina B...
        </div>
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-lightbulb-o text-yellow-500 mr-1"></i>核心总结:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究如何突破LLM推理中的串行瓶颈问题。核心思想是通过并行异构执行架构，让草稿模型和目标模型同时进行推测计算，并引入多令牌推测流技术，实现低延迟下的高接受率推理加速。</p>
        </div>
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出了一种突破LLM推理串行瓶颈的新方法，通过并行异构执行和多令牌推测流技术，直接提升LLM推理效率，对搜索和推荐系统的实时响应至关重要。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 05:22:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13161v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13161v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Speculative decoding accelerates LLM inference by using a draft model to look ahead, but gains are capped by the cost of autoregressive draft generation: increasing draft size elevates acceptance rates but introduces additional latency overhead exacerbating the speed-accuracy tradeoff. Prior methods (Medusa, Hydra, EAGLE) partially reduce draft cost but either degrade acceptance or introduce overheads that limit scaling. We present Mirror Speculative Decoding (Mirror-SD), an inference algorithm that breaks the latency-acceptance tradeoff. Mirror-SD launches branch-complete rollouts from early-exit signals in parallel with the target model's suffix and explicitly maps computation across heterogeneous accelerators (GPU and NPU) to exploit cross-device parallelism. The draft speculates forward continuations for the target to verify, while the target simultaneously speculates correction paths for the draft, converting speculation into two complementary execution pipelines. To further cut draft latency without weakening acceptance semantics, we add speculative streaming so the draft emits multiple tokens per step. This dual strategy of parallel heterogeneous execution plus multi-token speculative streaming pushes speculative decoding toward its ideal regime of high acceptance with low overhead. On SpecBench with server-scale models from 14B to 66B parameters, Mirror-SD delivers consistent end-to-end gains, achieving 2.8x-5.8x wall-time speedups across diverse tasks and a 30% average relative improvement over the strongest baseline, EAGLE3.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13537v1" target="_blank" rel="noopener noreferrer">
                K-Merge：面向设备端大语言模型的适配器在线持续合并方法
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>7/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            K-Merge: Online Continual Merging of Adapters for On-device Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Donald Shenaj, Ondrej Bohdal, Taha Ceritli, Mete Ozay, Pietro Zanuttigh, Umberto...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文属于'使能LLM技术'范畴，专注于设备端LLM的高效适配器管理。在推荐系统和搜索场景中，这种在线持续合并技术可以支持个性化模型在移动设备上的高效更新，实现用户偏好的实时适应，同时保持模型效率。这对于移动端推荐和搜索应用的个性化体验优化具有直接应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 13:32:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13537v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13537v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    On-device deployment of Large Language Models (LLMs) frequently leverages Low-Rank Adapters (LoRAs) to support diverse downstream tasks under tight resource constraints. To address the limited storage capacity of mobile devices, recent works have explored model merging techniques to fuse multiple LoRAs into a single one. In practice, however, LoRAs are often delivered incrementally, as users request support for new tasks (e.g., novel problem types or languages). This scenario introduces a new challenge: on-device online continual merging, where the objective is to incorporate new LoRAs while preserving the performance on previously supported tasks. In this paper, we propose a data-free and computationally efficient strategy for selecting and merging LoRAs when a new one becomes available, assuming the device can store only a limited number of adapters. Extensive experiments across real-world tasks demonstrate the superiority of our approach compared to alternative strategies while adhering to the storage budget and compute limitations of on-device settings.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13796v1" target="_blank" rel="noopener noreferrer">
                语言模型中符号接地的机制性涌现
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            The Mechanistic Emergence of Symbol Grounding in Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuyu Wu, Ziqiao Ma, Xiaoxi Luo, Yidong Huang, Josue Torres-Fonseca, Freda Shi, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文探讨语言模型中符号接地的机制性涌现，这属于核心LLM技术的基础进展。符号接地对于推荐和搜索系统至关重要，因为它能增强模型对用户查询和商品特征的理解能力，从而提高语义匹配的准确性。这种基础理解能力的提升可以直接应用于改善搜索相关性排序和推荐系统的语义理解模块。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:56:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13796v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13796v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Symbol grounding (Harnad, 1990) describes how symbols such as words acquire their meanings by connecting to real-world sensorimotor experiences. Recent work has shown preliminary evidence that grounding may emerge in (vision-)language models trained at scale without using explicit grounding objectives. Yet, the specific loci of this emergence and the mechanisms that drive it remain largely unexplored. To address this problem, we introduce a controlled evaluation framework that systematically traces how symbol grounding arises within the internal computations through mechanistic and causal analysis. Our findings show that grounding concentrates in middle-layer computations and is implemented through the aggregate mechanism, where attention heads aggregate the environmental ground to support the prediction of linguistic forms. This phenomenon replicates in multimodal dialogue and across architectures (Transformers and state-space models), but not in unidirectional LSTMs. Our results provide behavioral and mechanistic evidence that symbol grounding can emerge in language models, with practical implications for predicting and potentially controlling the reliability of generation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13366v1" target="_blank" rel="noopener noreferrer">
                大语言模型时代的文档智能：综述
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Document Intelligence in the Era of Large Language Models: A Survey
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Weishi Wang, Hengchang Hu, Zhijie Zhang, Zhaochen Li, Hongxin Shao, Daniel Dahlm...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该综述涵盖文档智能技术，在搜索系统中具有直接应用价值，如文档理解、信息提取和检索增强。虽然不专门针对推荐或广告，但文档智能技术可以增强搜索系统的内容理解和用户查询处理能力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:57:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13366v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13366v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Document AI (DAI) has emerged as a vital application area, and is significantly transformed by the advent of large language models (LLMs). While earlier approaches relied on encoder-decoder architectures, decoder-only LLMs have revolutionized DAI, bringing remarkable advancements in understanding and generation. This survey provides a comprehensive overview of DAI's evolution, highlighting current research attempts and future prospects of LLMs in this field. We explore key advancements and challenges in multimodal, multilingual, and retrieval-augmented DAI, while also suggesting future research directions, including agent-based approaches and document-specific foundation models. This paper aims to provide a structured analysis of the state-of-the-art in DAI and its implications for both academic and practical applications.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13285v1" target="_blank" rel="noopener noreferrer">
                分布内引导：在语言模型生成中平衡控制与连贯性
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            In-Distribution Steering: Balancing Control and Coherence in Language Model Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Arthur Vogels, Benjamin Wong, Yann Choho, Annabelle Blangero, Milan Bhan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文属于'使能LLM技术'范畴，专注于语言模型生成控制技术。在推荐系统、搜索和广告领域，这种控制技术可以应用于生成更符合业务目标的推荐理由、搜索摘要或广告文案，同时保持内容的连贯性和自然性，对提升用户体验和商业效果具有直接应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:31:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13285v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13285v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Activation steering methods control large language model (LLM) behavior by modifying internal activations at inference time. However, most existing activation steering methods rely on a fixed steering strength, leading to either insufficient control or unadapted intervention that degrades text plausibility and coherence. We introduce In-Distribution Steering (IDS), a novel method that adapts steering strength based on the input data distribution in representation space. IDS dynamically adjusts interventions according to how far a given input lies within the distribution, enabling adaptive intervention and generation stability during text generation. Experiments demonstrate that IDS achieves strong accuracy on classification tasks while producing coherent text without collapse, making IDS particularly well suited for real-world applications.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13331v1" target="_blank" rel="noopener noreferrer">
                向量量化模型中自扩展码书的组优化方法
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Group-Wise Optimization for Self-Extensible Codebooks in Vector Quantized Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hong-Kai Zheng, Piji Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及向量量化模型的码书优化技术，属于Transformer架构效率改进的范畴。在推荐系统中，向量量化可用于高效的嵌入表示和检索，自扩展码书技术可提升模型对动态用户兴趣和物品特征的适应性，从而改善个性化推荐效果。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:14:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13331v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13331v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vector Quantized Variational Autoencoders (VQ-VAEs) leverage self-supervised learning through reconstruction tasks to represent continuous vectors using the closest vectors in a codebook. However, issues such as codebook collapse persist in the VQ model. To address these issues, existing approaches employ implicit static codebooks or jointly optimize the entire codebook, but these methods constrain the codebook's learning capability, leading to reduced reconstruction quality. In this paper, we propose Group-VQ, which performs group-wise optimization on the codebook. Each group is optimized independently, with joint optimization performed within groups. This approach improves the trade-off between codebook utilization and reconstruction performance. Additionally, we introduce a training-free codebook resampling method, allowing post-training adjustment of the codebook size. In image reconstruction experiments under various settings, Group-VQ demonstrates improved performance on reconstruction metrics. And the post-training codebook sampling method achieves the desired flexibility in adjusting the codebook size.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13131v1" target="_blank" rel="noopener noreferrer">
                OS-HGAdapter：面向大语言模型辅助熵增强图文对齐的开放语义超图适配器
            </a>
        </h3>
        <span class="score-badge bg-green-100 text-green-800">
            <i class="fa fa-star mr-1"></i>6/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            OS-HGAdapter: Open Semantic Hypergraph Adapter for Large Language Models Assisted Entropy-Enhanced Image-Text Alignment
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Rongjun Chen, Chengsi Yao, Jinchang Ren, Xianxian Zeng, Peixian Wang, Jun Yuan, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及多模态对齐（图文对齐）和LLM适配器技术，这与VLM类比异质数据建模相关，可能应用于搜索中的跨模态检索或推荐中的多模态内容理解。然而，其核心焦点是视觉-语言对齐而非纯粹的推荐/搜索/广告排名问题，因此相关性中等。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 04:09:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13131v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13131v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.MM</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Text-image alignment constitutes a foundational challenge in multimedia content understanding, where effective modeling of cross-modal semantic correspondences critically enhances retrieval system performance through joint embedding space optimization. Given the inherent difference in information entropy between texts and images, conventional approaches often show an imbalance in the mutual retrieval of these two modalities. To address this particular challenge, we propose to use the open semantic knowledge of Large Language Model (LLM) to fill for the entropy gap and reproduce the alignment ability of humans in these tasks. Our entropy-enhancing alignment is achieved through a two-step process: 1) a new prompt template that does not rely on explicit knowledge in the task domain is designed to use LLM to enhance the polysemy description of the text modality. By analogy, the information entropy of the text modality relative to the visual modality is increased; 2) A hypergraph adapter is used to construct multilateral connections between the text and image modalities, which can correct the positive and negative matching errors for synonymous semantics in the same fixed embedding space, whilst reducing the noise caused by open semantic entropy by mapping the reduced dimensions back to the original dimensions. Comprehensive evaluations on the Flickr30K and MS-COCO benchmarks validate the superiority of our Open Semantic Hypergraph Adapter (OS-HGAdapter), showcasing 16.8\% (text-to-image) and 40.1\% (image-to-text) cross-modal retrieval gains over existing methods while establishing new state-of-the-art performance in semantic alignment tasks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13614v1" target="_blank" rel="noopener noreferrer">
                MemoTime：记忆增强时序知识图谱增强的大型语言模型推理
            </a>
        </h3>
        <span class="score-badge bg-blue-100 text-blue-800">
            <i class="fa fa-star mr-1"></i>4/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MemoTime: Memory-Augmented Temporal Knowledge Graph Enhanced Large Language Model Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xingyu Tan, Xiaoyang Wang, Xiwei Xu, Xin Yuan, Liming Zhu, Wenjie Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注知识图谱增强的LLM推理，属于LLM技术使能范畴。时序知识图谱在推荐系统中具有潜在应用价值，可用于建模用户行为序列和动态兴趣演化，但论文标题未明确指向搜索、推荐或广告领域的直接应用，因此相关性中等。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:43:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13614v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13614v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) have achieved impressive reasoning abilities, but struggle with temporal understanding, especially when questions involve multiple entities, compound operators, and evolving event sequences. Temporal Knowledge Graphs (TKGs), which capture vast amounts of temporal facts in a structured format, offer a reliable source for temporal reasoning. However, existing TKG-based LLM reasoning methods still struggle with four major challenges: maintaining temporal faithfulness in multi-hop reasoning, achieving multi-entity temporal synchronization, adapting retrieval to diverse temporal operators, and reusing prior reasoning experience for stability and efficiency. To address these issues, we propose MemoTime, a memory-augmented temporal knowledge graph framework that enhances LLM reasoning through structured grounding, recursive reasoning, and continual experience learning. MemoTime decomposes complex temporal questions into a hierarchical Tree of Time, enabling operator-aware reasoning that enforces monotonic timestamps and co-constrains multiple entities under unified temporal bounds. A dynamic evidence retrieval layer adaptively selects operator-specific retrieval strategies, while a self-evolving experience memory stores verified reasoning traces, toolkit decisions, and sub-question embeddings for cross-type reuse. Comprehensive experiments on multiple temporal QA benchmarks show that MemoTime achieves overall state-of-the-art results, outperforming the strong baseline by up to 24.0%. Furthermore, MemoTime enables smaller models (e.g., Qwen3-4B) to achieve reasoning performance comparable to that of GPT-4-Turbo.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13312v1" target="_blank" rel="noopener noreferrer">
                ChatR1：用于对话推理和检索增强问答的强化学习
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ChatR1: Reinforcement Learning for Conversational Reasoning and Retrieval Augmented Question Answering
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Simon Lupart, Mohammad Aliannejadi, Evangelos Kanoulas
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然论文涉及检索增强和对话推理，这些技术可能应用于搜索系统，但核心焦点是强化学习在问答中的应用。根据指导原则，没有明确展示与推荐系统、搜索或广告相关性的强化学习论文应被视为低相关性。检索增强组件具有潜在搜索应用，但强化学习主导使其相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:00:20
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13312v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13312v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.IR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present ChatR1, a reasoning framework based on reinforcement learning (RL) for conversational question answering (CQA). Reasoning plays an important role in CQA, where user intent evolves across dialogue turns, and utterances are often underspecified, requiring contextual interpretation, query reformulation, and dynamic coordination between retrieval and generation. Unlike static `rewrite, retrieve, and generate' pipelines, ChatR1 interleaves search and reasoning across turns, enabling exploratory and adaptive behaviors learned through RL. To address the challenge of sparse and delayed rewards in RL, we propose an intent-aware reward that provides turn-level feedback by aligning retrieval and reasoning with evolving user goals. Our proposed ChatR1 demonstrates strong performance on both 3B and 7B model backbones, outperforming competitive models on five CQA datasets, measured by different metrics (F1, BERTScore, and LLM-as-judge). We include a diverse set of CQA datasets to cover topic shifts, evolving intents, mixed-initiative dialogues, and multi-document grounding, testing ChatR1's performance from various aspects. Ablation studies confirm the effectiveness of the intent-aware reward. Our analyses further reveal diverse reasoning trajectories and effective use of the search tool. ChatR1 also generalizes robustly across domains, demonstrating that RL-based reasoning enables more flexible and context-sensitive behavior than static CQA pipelines.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13750v1" target="_blank" rel="noopener noreferrer">
                基于置信度的响应弃权：通过基于激活的不确定性估计提升大语言模型可信度
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Confidence-Based Response Abstinence: Improving LLM Trustworthiness via Activation-Based Uncertainty Estimation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhiqi Huang, Vivek Datla, Chenyang Zhu, Alfy Samuel, Daben Liu, Anoop Kumar, Rit...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM可信度和不确定性估计，属于LLM评估和可靠性范畴。虽然不确定性估计在技术上可能对推荐系统或搜索中的置信度校准有潜在应用，但论文焦点更偏向NLP-centric的评估和可信度问题，而非直接的RecSys/Search/Ads应用或核心架构改进。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:55:56
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13750v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13750v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We propose a method for confidence estimation in retrieval-augmented generation (RAG) systems that aligns closely with the correctness of large language model (LLM) outputs. Confidence estimation is especially critical in high-stakes domains such as finance and healthcare, where the cost of an incorrect answer outweighs that of not answering the question. Our approach extends prior uncertainty quantification methods by leveraging raw feed-forward network (FFN) activations as auto-regressive signals, avoiding the information loss inherent in token logits and probabilities after projection and softmax normalization. We model confidence prediction as a sequence classification task, and regularize training with a Huber loss term to improve robustness against noisy supervision. Applied in a real-world financial industry customer-support setting with complex knowledge bases, our method outperforms strong baselines and maintains high accuracy under strict latency constraints. Experiments on Llama 3.1 8B model show that using activations from only the 16th layer preserves accuracy while reducing response latency. Our results demonstrate that activation-based confidence modeling offers a scalable, architecture-aware path toward trustworthy RAG deployment.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13626v1" target="_blank" rel="noopener noreferrer">
                LIBERO-Plus：视觉-语言-动作模型的深度鲁棒性分析
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LIBERO-Plus: In-depth Robustness Analysis of Vision-Language-Action Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Senyu Fei, Siyin Wang, Junhao Shi, Zihao Dai, Jikun Cai, Pengfang Qian, Li Ji, X...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉-语言-动作模型的鲁棒性分析，属于多模态模型的评估范畴。虽然视觉-语言模型（VLM）与异构数据建模有概念关联，但该论文聚焦于动作规划和机器人控制领域的鲁棒性测试，与推荐系统、搜索或广告的核心技术需求关联度较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:51:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13626v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13626v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Visual-Language-Action (VLA) models report impressive success rates on robotic manipulation benchmarks, yet these results may mask fundamental weaknesses in robustness. We perform a systematic vulnerability analysis by introducing controlled perturbations across seven dimensions: objects layout, camera viewpoints, robot initial states, language instructions, light conditions, background textures and sensor noise. We comprehensively analyzed multiple state-of-the-art models and revealed consistent brittleness beneath apparent competence. Our analysis exposes critical weaknesses: models exhibit extreme sensitivity to perturbation factors, including camera viewpoints and robot initial states, with performance dropping from 95% to below 30% under modest perturbations. Surprisingly, models are largely insensitive to language variations, with further experiments revealing that models tend to ignore language instructions completely. Our findings challenge the assumption that high benchmark scores equate to true competency and highlight the need for evaluation practices that assess reliability under realistic variation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13363v1" target="_blank" rel="noopener noreferrer">
                D-SMART：通过动态结构化记忆与推理树增强大语言模型对话一致性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            D-SMART: Enhancing LLM Dialogue Consistency via Dynamic Structured Memory And Reasoning Tree
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiang Lei, Qin Li, Min Zhang, Min Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM对话一致性改进，属于对话系统的技术优化。虽然涉及记忆和推理机制，但核心应用场景是对话系统而非推荐/搜索/广告领域。动态结构化记忆技术理论上可能应用于用户行为序列建模，但论文标题未显示明确的推荐/搜索/广告应用连接。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:53:11
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13363v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13363v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">68T50</span><span class="category-tag">68T30</span><span class="category-tag">I.2.7; I.2.4</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) often exhibit factual inconsistencies and logical decay in extended, multi-turn dialogues, a challenge stemming from their reliance on static, pre-trained knowledge and an inability to reason adaptively over the dialogue history. Prevailing mitigation strategies, such as Retrieval-Augmented Generation (RAG) and agentic working memories, improve information recall but still engage with fundamentally static knowledge sources and follow pre-defined single reasoning path. This hinders their ability to preserve factual and logical consistency of their responses in multi-turn dialogues while the context evolves over time. To address this issue, we propose D-SMART, a model-agnostic framework designed to maintain multi-turn dialogue consistency by enabling LLMs to build and reason over a dynamic, structured representation of the conversational context. This is achieved via two synergistic components: (1) a Dynamic Structured Memory (DSM), which incrementally constructs and maintains an authoritative, OWL-compliant knowledge graph of the conversation; and (2) a Reasoning Tree (RT), which executes inferences as an explicit and traceable multi-step search over the graph. As the popular-used quality score (judged by GPT-4) can overlook logical flaws, we introduce new NLI-based metrics to better measure multi-turn dialogue consistency. Comprehensive experiments on the MT-Bench-101 benchmark show that D-SMART significantly outperforms state-of-the-art baselines, elevating the dialogue consistency score by over 48\% for both proprietary and open-source models, and notably improves the quality score of the latter by up to 10.1\%.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13344v1" target="_blank" rel="noopener noreferrer">
                UniMoE-Audio：基于动态容量专家混合模型的统一语音与音乐生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UniMoE-Audio: Unified Speech and Music Generation with Dynamic-Capacity MoE
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhenyu Liu, Yunxin Li, Xuanyu Zhang, Qixun Teng, Shenyuan Jiang, Xinyu Chen, Hao...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注语音和音乐生成的特定领域应用，属于音频生成而非推荐系统、搜索或广告的核心技术。虽然MoE架构本身是Transformer的效率改进技术，但论文专注于音频生成这一与RecSys/Search/Ads无关的应用场景，缺乏明确的跨领域应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:30:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13344v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13344v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.SD</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent advances in unified multimodal models indicate a clear trend towards comprehensive content generation. However, the auditory domain remains a significant challenge, with music and speech often developed in isolation, hindering progress towards universal audio synthesis. This separation stems from inherent task conflicts and severe data imbalances, which impede the development of a truly unified audio generation model. To address this challenge, we propose UniMoE-Audio, a unified speech and music generation model within a novel Dynamic-Capacity Mixture-of-Experts (MoE) framework. Architecturally, UniMoE-Audio introduces a Top-P routing strategy for dynamic expert number allocation, and a hybrid expert design comprising routed experts for domain-specific knowledge, shared experts for domain-agnostic features, and null experts for adaptive computation skipping. To tackle data imbalance, we introduce a three-stage training curriculum: 1) Independent Specialist Training leverages original datasets to instill domain-specific knowledge into each "proto-expert" without interference; 2) MoE Integration and Warmup incorporates these specialists into the UniMoE-Audio architecture, warming up the gate module and shared expert using a subset of balanced dataset; and 3) Synergistic Joint Training trains the entire model end-to-end on the fully balanced dataset, fostering enhanced cross-domain synergy. Extensive experiments show that UniMoE-Audio not only achieves state-of-the-art performance on major speech and music generation benchmarks, but also demonstrates superior synergistic learning, mitigating the performance degradation typically seen in naive joint training. Our findings highlight the substantial potential of specialized MoE architecture and curated training strategies in advancing the field of universal audio generation. Homepage: https://mukioxun.github.io/Uni-MoE-site/home.html
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13272v1" target="_blank" rel="noopener noreferrer">
                超越正确性：在检索增强生成中奖励忠实推理
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Correctness: Rewarding Faithful Reasoning in Retrieval-Augmented Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhichao Xu, Zongyu Wu, Yun Zhou, Aosong Feng, Kang Zhou, Sangmin Woo, Kiran Ramn...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注检索增强生成(RAG)中的忠实推理评估，这属于纯粹的LLM评估和基准测试范畴。虽然RAG技术在搜索系统中有应用，但该论文聚焦于推理忠实性的评估方法，而非搜索/推荐/广告系统的核心算法改进或架构创新，与当前关注点的相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:17:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13272v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13272v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Inspired by the success of reinforcement learning (RL) in Large Language Model (LLM) training for domains like math and code, recent works have begun exploring how to train LLMs to use search engines more effectively as tools for retrieval-augmented generation. Although these methods achieve performance improvement across QA benchmarks, many prioritize final answer correctness while overlooking the quality of intermediate reasoning steps, which may lead to chain-of-thought unfaithfulness. In this paper, we first introduce a comprehensive evaluation framework for evaluating RL-based search agents, covering three distinct faithfulness metrics: information-think faithfulness, think-answer faithfulness, and think-search faithfulness. Our evaluations reveal that a prototypical RL-based search agent, Search-R1, has significant room for improvement in this regard. To foster faithful reasoning, we introduce VERITAS (Verifying Entailed Reasoning through Intermediate Traceability in Agentic Search), a novel framework that integrates fine-grained faithfulness rewards into the reinforcement learning process. Our experiments show that models trained with VERITAS not only significantly improve reasoning faithfulness, but also achieve comparable task performance across seven QA benchmarks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13143v1" target="_blank" rel="noopener noreferrer">
                稳定大语言模型集成：示例代表性与其多样性之间的相互作用
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Stable LLM Ensemble: Interaction between Example Representativeness and Diversity
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Junichiro Niimi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM集成方法中的示例选择策略，属于LLM技术的基础研究。虽然集成方法可能间接提升推荐或搜索系统的稳定性，但论文焦点更偏向通用的LLM优化而非直接面向RecSys/Search/Ads的应用场景，相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 04:49:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13143v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13143v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) have achieved remarkable results in wide range of domains. However, the accuracy and robustness of one-shot LLM predictions remain highly sensitive to the examples and the diversity among ensemble members. This study systematically investigates the effects of example representativeness (one-shot strategy) and output diversity (sampling temperature) on LLM ensemble performance. Two one-shot strategies are compared: centroid-based representative examples (proposed) and randomly sampled examples (baseline) and sampling temperature also is varied. The proposed approach with higher temperature setting significantly outperforms random selection by +7.6% (macro-F1) and -10.5% (RMSE). Furthermore, the proposed model exceeds 5-shot prompting by +21.1% (macro-F1) and -24.0% (RMSE). Our findings demonstrate that combining representative example selection with increased temperature provides the appropriate level of diversity to the ensemble. This work highlights the practical importance of both example selection and controlled diversity in designing effective one-shot LLM ensembles.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13103v1" target="_blank" rel="noopener noreferrer">
                ESI：通过语义保持干预实现大型语言模型的认知不确定性量化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ESI: Epistemic Uncertainty Quantification via Semantic-preserving Intervention for Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mingda Li, Xinyu Li, Weinan Zhang, Longxuan Ma
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的不确定性量化方法，属于LLM可信性和可靠性范畴。虽然不确定性估计在推荐系统中可能有应用价值（如置信度评分），但论文标题未明确指向RecSys/Search/Ads领域的特定应用，且更偏向通用LLM可靠性研究而非直接的应用创新。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 02:46:43
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13103v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13103v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Uncertainty Quantification (UQ) is a promising approach to improve model reliability, yet quantifying the uncertainty of Large Language Models (LLMs) is non-trivial. In this work, we establish a connection between the uncertainty of LLMs and their invariance under semantic-preserving intervention from a causal perspective. Building on this foundation, we propose a novel grey-box uncertainty quantification method that measures the variation in model outputs before and after the semantic-preserving intervention. Through theoretical justification, we show that our method provides an effective estimate of epistemic uncertainty. Our extensive experiments, conducted across various LLMs and a variety of question-answering (QA) datasets, demonstrate that our method excels not only in terms of effectiveness but also in computational efficiency.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13774v1" target="_blank" rel="noopener noreferrer">
                UrbanFusion：用于鲁棒空间表示对比学习的随机多模态融合
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UrbanFusion: Stochastic Multimodal Fusion for Contrastive Learning of Robust Spatial Representations
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dominik J. Mühlematter, Lin Che, Ye Hong, Martin Raubal, Nina Wiedemann
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文提出了一种多模态融合方法用于空间表示学习，这在一定程度上与'VLM类比用于异构数据'相关，因为它处理多模态数据。然而，该方法专注于城市空间表示，没有明确展示在推荐系统、搜索或广告中的应用潜力，因此相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:26:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13774v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13774v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Forecasting urban phenomena such as housing prices and public health indicators requires the effective integration of various geospatial data. Current methods primarily utilize task-specific models, while recent foundation models for spatial representations often support only limited modalities and lack multimodal fusion capabilities. To overcome these challenges, we present UrbanFusion, a Geo-Foundation Model (GeoFM) that features Stochastic Multimodal Fusion (SMF). The framework employs modality-specific encoders to process different types of inputs, including street view imagery, remote sensing data, cartographic maps, and points of interest (POIs) data. These multimodal inputs are integrated via a Transformer-based fusion module that learns unified representations. An extensive evaluation across 41 tasks in 56 cities worldwide demonstrates UrbanFusion's strong generalization and predictive performance compared to state-of-the-art GeoAI models. Specifically, it 1) outperforms prior foundation models on location-encoding, 2) allows multimodal input during inference, and 3) generalizes well to regions unseen during training. UrbanFusion can flexibly utilize any subset of available modalities for a given location during both pretraining and inference, enabling broad applicability across diverse data availability scenarios. All source code is available at https://github.com/DominikM198/UrbanFusion.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13714v1" target="_blank" rel="noopener noreferrer">
                Dedelayed：通过设备端校正消除远程推理延迟
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Dedelayed: Deleting remote inference delay via on-device correction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dan Jacobellis, Mateen Ulhaq, Fabien Racapé, Hyomin Choi, Neeraja J. Yadwadkar
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注于通过设备端校正来减少远程推理延迟，这属于边缘计算和推理优化的范畴。虽然延迟优化在推荐和广告系统中具有潜在价值，但论文标题未明确表明与LLM、Transformer架构或推荐系统核心技术的直接关联。设备端推理优化可能应用于移动推荐场景，但相关性较弱且不够具体。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:13:44
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13714v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13714v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">eess.IV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Remote inference allows lightweight devices to leverage powerful cloud models. However, communication network latency makes predictions stale and unsuitable for real-time tasks. To address this, we introduce Dedelayed, a delay-corrective method that mitigates arbitrary remote inference delays, allowing the local device to produce low-latency outputs in real time. Our method employs a lightweight local model that processes the current frame and fuses in features that a heavyweight remote model computes from past frames. On video from the BDD100K driving dataset, Dedelayed improves semantic segmentation accuracy over the stronger of the local-only and remote-only baselines across all realistic communication network delays beyond 33 ms. Without incurring additional delay, it improves accuracy by 6.4 mIoU compared to fully local inference and 9.8 mIoU compared to remote inference, for a round-trip delay of 100 ms. The advantage grows under longer delays and higher-motion scenes, as delay-mitigated split inference sustains accuracy more effectively, providing clear advantages for real-time tasks that must remain aligned with the current world state.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13675v1" target="_blank" rel="noopener noreferrer">
                野外视觉与认知：基于对比学习的大规模知识图谱开放域视觉实体识别
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Seeing and Knowing in the Wild: Open-domain Visual Entity Recognition with Large-scale Knowledge Graphs via Contrastive Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hongkuan Zhou, Lavdim Halilaj, Sebastian Monka, Stefan Schmid, Yuqicheng Zhu, Ji...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉实体识别与知识图谱的结合，属于视觉-语言多模态领域，与VLM类比异构数据的理念有一定关联。然而，其核心应用场景偏向通用视觉识别而非推荐/搜索/广告中的异构数据处理，潜在应用有限，仅能间接启发多模态特征融合方法。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 15:33:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13675v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13675v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Open-domain visual entity recognition aims to identify and link entities depicted in images to a vast and evolving set of real-world concepts, such as those found in Wikidata. Unlike conventional classification tasks with fixed label sets, it operates under open-set conditions, where most target entities are unseen during training and exhibit long-tail distributions. This makes the task inherently challenging due to limited supervision, high visual ambiguity, and the need for semantic disambiguation. In this work, we propose a Knowledge-guided Contrastive Learning (KnowCoL) framework that combines both images and text descriptions into a shared semantic space grounded by structured information from Wikidata. By abstracting visual and textual inputs to a conceptual level, the model leverages entity descriptions, type hierarchies, and relational context to support zero-shot entity recognition. We evaluate our approach on the OVEN benchmark, a large-scale open-domain visual recognition dataset with Wikidata IDs as the label space. Our experiments show that using visual, textual, and structured knowledge greatly improves accuracy, especially for rare and unseen entities. Our smallest model improves the accuracy on unseen entities by 10.5% compared to the state-of-the-art, despite being 35 times smaller.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13515v1" target="_blank" rel="noopener noreferrer">
                UniME-V2：作为评判者的多模态大语言模型用于通用多模态嵌入学习
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UniME-V2: MLLM-as-a-Judge for Universal Multimodal Embedding Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tiancheng Gu, Kaicheng Yang, Kaichen Zhang, Xiang An, Ziyong Feng, Yueyi Zhang, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多模态大语言模型作为评判者用于嵌入学习，属于多模态领域的技术进展。虽然多模态嵌入技术可能应用于搜索中的跨模态检索，但论文标题未明确表明与推荐系统、搜索或广告的直接关联，且多模态大语言模型作为评判者的应用更偏向通用评估而非特定领域优化，因此相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 13:07:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13515v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13515v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Universal multimodal embedding models are foundational to various tasks. Existing approaches typically employ in-batch negative mining by measuring the similarity of query-candidate pairs. However, these methods often struggle to capture subtle semantic differences among candidates and lack diversity in negative samples. Moreover, the embeddings exhibit limited discriminative ability in distinguishing false and hard negatives. In this paper, we leverage the advanced understanding capabilities of MLLMs to enhance representation learning and present a novel Universal Multimodal Embedding (UniME-V2) model. Our approach first constructs a potential hard negative set through global retrieval. We then introduce the MLLM-as-a-Judge mechanism, which utilizes MLLMs to assess the semantic alignment of query-candidate pairs and generate soft semantic matching scores. These scores serve as a foundation for hard negative mining, mitigating the impact of false negatives and enabling the identification of diverse, high-quality hard negatives. Furthermore, the semantic matching scores are used as soft labels to mitigate the rigid one-to-one mapping constraint. By aligning the similarity matrix with the soft semantic matching score matrix, the model learns semantic distinctions among candidates, significantly enhancing its discriminative capacity. To further improve performance, we propose UniME-V2-Reranker, a reranking model trained on our mined hard negatives through a joint pairwise and listwise optimization approach. We conduct comprehensive experiments on the MMEB benchmark and multiple retrieval tasks, demonstrating that our method achieves state-of-the-art performance on average across all tasks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13432v1" target="_blank" rel="noopener noreferrer">
                CoDS：通过领域分离增强异构场景中的协同感知
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CoDS: Enhancing Collaborative Perception in Heterogeneous Scenarios via Domain Separation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yushan Han, Hui Zhang, Honglei Zhang, Chuntao Ding, Yuanzhouhan Cao, Yidong Li
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉领域的协同感知，虽然涉及异构场景处理，但其核心应用场景是自动驾驶等视觉感知任务，而非推荐系统、搜索或广告。虽然领域分离技术可能对处理异构数据有启发，但缺乏明确的RecSys/Search/Ads应用连接。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 11:29:14
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13432v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13432v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Collaborative perception has been proven to improve individual perception in autonomous driving through multi-agent interaction. Nevertheless, most methods often assume identical encoders for all agents, which does not hold true when these models are deployed in real-world applications. To realize collaborative perception in actual heterogeneous scenarios, existing methods usually align neighbor features to those of the ego vehicle, which is vulnerable to noise from domain gaps and thus fails to address feature discrepancies effectively. Moreover, they adopt transformer-based modules for domain adaptation, which causes the model inference inefficiency on mobile devices. To tackle these issues, we propose CoDS, a Collaborative perception method that leverages Domain Separation to address feature discrepancies in heterogeneous scenarios. The CoDS employs two feature alignment modules, i.e., Lightweight Spatial-Channel Resizer (LSCR) and Distribution Alignment via Domain Separation (DADS). Besides, it utilizes the Domain Alignment Mutual Information (DAMI) loss to ensure effective feature alignment. Specifically, the LSCR aligns the neighbor feature across spatial and channel dimensions using a lightweight convolutional layer. Subsequently, the DADS mitigates feature distribution discrepancy with encoder-specific and encoder-agnostic domain separation modules. The former removes domain-dependent information and the latter captures task-related information. During training, the DAMI loss maximizes the mutual information between aligned heterogeneous features to enhance the domain separation process. The CoDS employs a fully convolutional architecture, which ensures high inference efficiency. Extensive experiments demonstrate that the CoDS effectively mitigates feature discrepancies in heterogeneous scenarios and achieves a trade-off between detection accuracy and inference efficiency.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13316v1" target="_blank" rel="noopener noreferrer">
                视觉趣味性解码：GPT-4o如何反映人类兴趣
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Visual Interestingness Decoded: How GPT-4o Mirrors Human Interests
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fitim Abdullahu, Helmut Grabner
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文研究GPT-4o对人类视觉兴趣的反映，主要涉及视觉理解和人类兴趣建模，但缺乏明确的推荐系统、搜索或广告应用场景。虽然视觉语言模型技术可能间接支持多模态推荐，但论文焦点更偏向纯粹的视觉理解和心理学研究，与当前关注的核心领域相关性较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:04:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13316v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13316v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Our daily life is highly influenced by what we consume and see. Attracting and holding one's attention -- the definition of (visual) interestingness -- is essential. The rise of Large Multimodal Models (LMMs) trained on large-scale visual and textual data has demonstrated impressive capabilities. We explore these models' potential to understand to what extent the concepts of visual interestingness are captured and examine the alignment between human assessments and GPT-4o's, a leading LMM, predictions through comparative analysis. Our studies reveal partial alignment between humans and GPT-4o. It already captures the concept as best compared to state-of-the-art methods. Hence, this allows for the effective labeling of image pairs according to their (commonly) interestingness, which are used as training data to distill the knowledge into a learning-to-rank model. The insights pave the way for a deeper understanding of human interest.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13253v1" target="_blank" rel="noopener noreferrer">
                端到端多模态扩散Mamba
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            End-to-End Multi-Modal Diffusion Mamba
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chunhao Lu, Qiang Lu, Meichen Dong, Jake Luo
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文结合了扩散模型（多模态生成）和Mamba架构（状态空间模型），属于架构创新。虽然Mamba在序列建模效率方面有潜力，但该论文主要关注多模态扩散生成，与推荐/搜索/广告的核心排序任务关联较弱。其潜在应用可能在于多模态内容理解，但距离实际RecSys/Search/Ads应用较远。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:03:50
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13253v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13253v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Current end-to-end multi-modal models utilize different encoders and decoders to process input and output information. This separation hinders the joint representation learning of various modalities. To unify multi-modal processing, we propose a novel architecture called MDM (Multi-modal Diffusion Mamba). MDM utilizes a Mamba-based multi-step selection diffusion model to progressively generate and refine modality-specific information through a unified variational autoencoder for both encoding and decoding. This innovative approach allows MDM to achieve superior performance when processing high-dimensional data, particularly in generating high-resolution images and extended text sequences simultaneously. Our evaluations in areas such as image generation, image captioning, visual question answering, text comprehension, and reasoning tasks demonstrate that MDM significantly outperforms existing end-to-end models (MonoFormer, LlamaGen, and Chameleon etc.) and competes effectively with SOTA models like GPT-4V, Gemini Pro, and Mistral. Our results validate MDM's effectiveness in unifying multi-modal processes while maintaining computational efficiency, establishing a new direction for end-to-end multi-modal architectures.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13251v1" target="_blank" rel="noopener noreferrer">
                映射信息流：揭示视频大语言模型中隐藏的信息传递路径
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Map the Flow: Revealing Hidden Pathways of Information in VideoLLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Minji Kim, Taekyung Kim, Bohyung Han
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频大语言模型中的信息流分析，属于视觉语言模型范畴。虽然提到了VLM，但焦点在于模型内部机制分析而非异构数据统一建模，与'VLM Analogy for Heterogeneous Data'的核心理念关联较弱。在推荐/搜索/广告领域的潜在应用有限，主要是模型诊断工具而非直接应用技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:59:06
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13251v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13251v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Video Large Language Models (VideoLLMs) extend the capabilities of vision-language models to spatiotemporal inputs, enabling tasks such as video question answering (VideoQA). Despite recent advances in VideoLLMs, their internal mechanisms on where and how they extract and propagate video and textual information remain less explored. In this study, we investigate the internal information flow of VideoLLMs using mechanistic interpretability techniques. Our analysis reveals consistent patterns across diverse VideoQA tasks: (1) temporal reasoning in VideoLLMs initiates with active cross-frame interactions in early-to-middle layers, (2) followed by progressive video-language integration in middle layers. This is facilitated by alignment between video representations and linguistic embeddings containing temporal concepts. (3) Upon completion of this integration, the model is ready to generate correct answers in middle-to-late layers. (4) Based on our analysis, we show that VideoLLMs can retain their VideoQA performance by selecting these effective information pathways while suppressing a substantial amount of attention edges, e.g., 58% in LLaVA-NeXT-7B-Video-FT. These findings provide a blueprint on how VideoLLMs perform temporal reasoning and offer practical insights for improving model interpretability and downstream generalization. Our project page with the source code is available at https://map-the-flow.github.io
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13234v1" target="_blank" rel="noopener noreferrer">
                UniVector：通过实例-几何交互的统一向量提取
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UniVector: Unified Vector Extraction via Instance-Geometry Interaction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yinglong Yan, Jun Yue, Shaobo Xia, Hanmeng Sun, Tianxu Ying, Chengcheng Wu, Sifa...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题暗示了向量提取的统一方法，可能涉及特征表示学习，这与推荐系统中的嵌入学习有一定相关性。然而，标题中强调的“实例-几何交互”更偏向计算机视觉中的目标检测或实例分割，缺乏明确的推荐、搜索或广告应用场景的直接联系。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:39:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13234v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13234v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vector extraction retrieves structured vector geometry from raster images, offering high-fidelity representation and broad applicability. Existing methods, however, are usually tailored to a single vector type (e.g., polygons, polylines, line segments), requiring separate models for different structures. This stems from treating instance attributes (category, structure) and geometric attributes (point coordinates, connections) independently, limiting the ability to capture complex structures. Inspired by the human brain's simultaneous use of semantic and spatial interactions in visual perception, we propose UniVector, a unified VE framework that leverages instance-geometry interaction to extract multiple vector types within a single model. UniVector encodes vectors as structured queries containing both instance- and geometry-level information, and iteratively updates them through an interaction module for cross-level context exchange. A dynamic shape constraint further refines global structures and key points. To benchmark multi-structure scenarios, we introduce the Multi-Vector dataset with diverse polygons, polylines, and line segments. Experiments show UniVector sets a new state of the art on both single- and multi-structure VE tasks. Code and dataset will be released at https://github.com/yyyyll0ss/UniVector.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13219v1" target="_blank" rel="noopener noreferrer">
                基于提示的大规模视觉模型适应方法综述
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Prompt-based Adaptation in Large-scale Vision Models: A Survey
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xi Xiao, Yunbei Zhang, Lin Zhao, Yiyang Liu, Xiaoying Liao, Zheda Mai, Xingjian ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文主要关注视觉模型的提示适应方法，属于纯粹的视觉领域，与推荐系统、搜索或广告的核心技术没有直接关联。虽然提示工程在LLM中有广泛应用，但本文聚焦于视觉模型而非语言模型，因此对当前关注领域的潜在应用价值有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:14:50
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13219v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13219v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In computer vision, Visual Prompting (VP) and Visual Prompt Tuning (VPT) have recently emerged as lightweight and effective alternatives to full fine-tuning for adapting large-scale vision models within the ``pretrain-then-finetune'' paradigm. However, despite rapid progress, their conceptual boundaries remain blurred, as VP and VPT are frequently used interchangeably in current research, reflecting a lack of systematic distinction between these techniques and their respective applications. In this survey, we revisit the designs of VP and VPT from first principles, and conceptualize them within a unified framework termed Prompt-based Adaptation (PA). We provide a taxonomy that categorizes existing methods into learnable, generative, and non-learnable prompts, and further organizes them by injection granularity -- pixel-level and token-level. Beyond the core methodologies, we examine PA's integrations across diverse domains, including medical imaging, 3D point clouds, and vision-language tasks, as well as its role in test-time adaptation and trustworthy AI. We also summarize current benchmarks and identify key challenges and future directions. To the best of our knowledge, we are the first comprehensive survey dedicated to PA's methodologies and applications in light of their distinct characteristics. Our survey aims to provide a clear roadmap for researchers and practitioners in all area to understand and explore the evolving landscape of PA-related research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13105v1" target="_blank" rel="noopener noreferrer">
                EgoSocial：通过以自我为中心的社会交互感知基准测试全模态大语言模型的主动干预能力
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>3/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            EgoSocial: Benchmarking Proactive Intervention Ability of Omnimodal LLMs via Egocentric Social Interaction Perception
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xijun Wang, Tanay Sharma, Achin Kulshrestha, Abhimitra Meka, Aveek Purohit, Dine...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注全模态LLM在社交交互感知和主动干预方面的基准测试，这属于纯粹的LLM评估范畴，与我的核心关注点无关。虽然提到了'全模态'概念，但焦点是社交交互基准测试，而非推荐系统、搜索或广告领域的实际应用或架构创新。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 02:52:19
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13105v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13105v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    As AR/VR technologies become integral to daily life, there's a growing need for AI that understands human social dynamics from an egocentric perspective. However, current LLMs often lack the social awareness to discern when to intervene as AI assistant. This leads to constant, socially unaware responses that may disrupt natural conversation and negatively impact user focus. To address these limitations, we introduce EgoSocial, a large-scale egocentric dataset with 13,500 social video-question pairs, specifically designed to benchmark intervention in social interaction perception. We also present an in-depth analysis of current omnimodal LLMs (OLLMs) to assess their effectiveness in detecting diverse social contextual cues. Experiments show that OLLMs still struggle to detect the intervention timing (14.4% for Gemini 2.5 Pro). We also propose EgoSoD (EgoSocial Detection), an end-to-end method for robustly discerning social dynamics. Informed by our OLLM analysis, EgoSoD integrates multimodal contextual cues (e.g., audio and visual cues) into a social thinking graph, dynamically modeling participants and interactions. Our method proactively detects intervention timing and social interactions, precisely determining when to intervene. Our EgoSoD improves Phi-4 by 45.6% and Gemini 2.5 Pro by 9.9% on Intervention Timing performance, and improves Phi-4 by 20.4% and Gemini 2.5 Pro by 6.9% on overall Social Interaction performance. We will release the dataset and code soon.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13804v1" target="_blank" rel="noopener noreferrer">
                生成式通用验证器作为多模态元推理器
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Generative Universal Verifier as Multimodal Meta-Reasoner
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xinchen Zhang, Xiaoying Zhang, Youbin Wu, Yanbin Cao, Renrui Zhang, Ruihang Chu,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题暗示了多模态推理和验证能力，这可能与VLM类比处理异构数据的概念有微弱联系。然而，它更侧重于通用的多模态推理和验证，而非专门针对推荐系统、搜索或广告领域的应用，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:59:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13804v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13804v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce Generative Universal Verifier, a novel concept and plugin designed for next-generation multimodal reasoning in vision-language models and unified multimodal models, providing the fundamental capability of reflection and refinement on visual outcomes during the reasoning and generation process. This work makes three main contributions: (1) We build ViVerBench, a comprehensive benchmark spanning 16 categories of critical tasks for evaluating visual outcomes in multimodal reasoning. Results show that existing VLMs consistently underperform across these tasks, underscoring a substantial gap from human-level capability in reliable visual verification. (2) We design two automated pipelines to construct large-scale visual verification data and train OmniVerifier-7B, the first omni-capable generative verifier trained for universal visual verification and achieves notable gains on ViVerBench(+8.3). Through training, we identify three atomic capabilities in visual verification and demonstrate how they generalize and interact synergistically. (3) We propose OmniVerifier-TTS, a sequential test-time scaling paradigm that leverages the universal verifier to bridge image generation and editing within unified models, enhancing the upper bound of generative ability through iterative fine-grained optimization. Beyond generation, we extend universal verifier to broader world-modeling interleaved reasoning scenarios. Empirically, OmniVerifier-TTS achieves improvements on T2I-ReasonBench(+3.7), and GenEval++(+4.3), outperforming existing parallel test-time scaling methods, such as Best-of-N. By endowing multimodal reasoning with reliable visual verification, OmniVerifier advances both reliable reflection during generation and scalable test-time refinement, marking a step toward more trustworthy and controllable next-generation reasoning systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13749v1" target="_blank" rel="noopener noreferrer">
                评估聊天助手中的网页搜索可信度与回答真实性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Assessing Web Search Credibility and Response Groundedness in Chat Assistants
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ivan Vykopal, Matúš Pikuliak, Simon Ostermann, Marián Šimko
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注搜索可信度和回答真实性评估，这属于纯粹的NLP评估基准范畴，与我的核心关注点不相关。虽然涉及搜索领域，但焦点是评估指标而非核心推荐系统、搜索或广告的技术进步或应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:55:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13749v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13749v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Chat assistants increasingly integrate web search functionality, enabling them to retrieve and cite external sources. While this promises more reliable answers, it also raises the risk of amplifying misinformation from low-credibility sources. In this paper, we introduce a novel methodology for evaluating assistants' web search behavior, focusing on source credibility and the groundedness of responses with respect to cited sources. Using 100 claims across five misinformation-prone topics, we assess GPT-4o, GPT-5, Perplexity, and Qwen Chat. Our findings reveal differences between the assistants, with Perplexity achieving the highest source credibility, whereas GPT-4o exhibits elevated citation of non-credibility sources on sensitive topics. This work provides the first systematic comparison of commonly used chat assistants for fact-checking behavior, offering a foundation for evaluating AI systems in high-stakes information environments.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13632v1" target="_blank" rel="noopener noreferrer">
                弥合大型语言模型中文本与语音理解之间的差距
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Closing the Gap Between Text and Speech Understanding in LLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Santiago Cuervo, Skyler Seto, Maureen de Seyssel, Richard He Bai, Zijin Gu, Tati...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注文本与语音模态的融合理解，这属于多模态LLM范畴，但语音理解在推荐系统、搜索或广告中的直接应用场景非常有限。虽然多模态技术可能间接启发异构数据处理，但论文标题明确聚焦于语音这一与当前关注点相关性较低的模态。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:57:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13632v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13632v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">eess.AS</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) can be adapted to extend their text capabilities to speech inputs. However, these speech-adapted LLMs consistently underperform their text-based counterparts--and even cascaded pipelines--on language understanding tasks. We term this shortfall the text-speech understanding gap: the performance drop observed when a speech-adapted LLM processes spoken inputs relative to when the original text-based LLM processes the equivalent text. Recent approaches to narrowing this gap either rely on large-scale speech synthesis of text corpora, which is costly and heavily dependent on synthetic data, or on large-scale proprietary speech datasets, which are not reproducible. As a result, there remains a need for more data-efficient alternatives for closing the text-speech understanding gap. In this work, we analyze the gap as driven by two factors: (i) forgetting of text capabilities during adaptation, and (ii) cross-modal misalignment between speech and text. Based on this analysis, we introduce SALAD--Sample-efficient Alignment with Learning through Active selection and cross-modal Distillation--which combines cross-modal distillation with targeted synthetic data to improve alignment while mitigating forgetting. Applied to 3B and 7B LLMs, SALAD achieves competitive performance with a strong open-weight model across broad-domain benchmarks in knowledge, language understanding, and reasoning, while training on over an order of magnitude less speech data from public corpora.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13624v1" target="_blank" rel="noopener noreferrer">
                解锁公共目录：为德国肿瘤诊断ICD编码的指令微调大语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Unlocking Public Catalogues: Instruction-Tuning LLMs for ICD Coding of German Tumor Diagnoses
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Stefan Lenz, Lakisha Ortiz Rosario, Georg Vollmar, Arsenij Ustjanzew, Fatma Alic...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医疗领域的ICD编码任务，属于医学信息学应用，与推荐系统、搜索或广告的核心领域无关。虽然涉及LLM指令微调技术，但该技术本身是通用的，论文并未探讨其在RecSys/Search/Ads中的潜在应用，因此相关性很低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:51:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13624v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13624v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Accurate coding of tumor diagnoses with ICD-10-GM and ICD-O-3 is essential for structured cancer documentation in Germany. Smaller open-weight LLMs are appealing for privacy-preserving automation but often struggle with coding accuracy in German-language contexts. This study investigates whether instruction-based fine-tuning on public datasets improves the coding accuracy of open-weight LLMs for German tumor diagnosis texts. The evaluation uses coded diagnoses from the local tumor documentation system as test data. In a systematic data quality assessment, the upper limit for ICD-10 coding performance was estimated at 60-79% for exact and 81-94% for partial (three-character codes only) derivation. As training data, over 500,000 question-answer pairs were created based on the ICD-10-GM, ICD-O-3, and OPS catalogues. Eight open-weight models from the Qwen, Llama, and Mistral families (7-70 B parameters) were fine-tuned. ICD-10-GM accuracy rose from 1.4-24% to 41-58%, and partial accuracy from 31-74% to 73-83%. The accuracy of ICD-O-3 topography coding also improved but started and remained considerably lower with an exact accuracy of 22-40% and a partial accuracy of 56-67% after fine-tuning. Malformed code outputs dropped to 0% for all models. Tumor-diagnosis recognition reached 99%. Accuracy correlated positively with model size, but gaps between small and large models narrowed after fine-tuning. The reasoning mode in Qwen3 generally yielded a lower performance than fine-tuning and was over 100 times slower. Our findings highlight the potential of leveraging public catalogues to build instruction datasets that improve LLMs in medical documentation tasks. The complete training dataset and the best-performing checkpoints of the fine-tuned models are available from https://huggingface.co/datasets/stefan-m-lenz/ICDOPS-QA-2024.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13586v1" target="_blank" rel="noopener noreferrer">
                游戏对话去扁平化：在基于大语言模型的非玩家角色中平衡角色真实性与任务执行
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Deflanderization for Game Dialogue: Balancing Character Authenticity with Task Execution in LLM-based NPCs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Pasin Buakhaw, Kun Kerdthaisong, Phuree Phenhiran, Pitikorn Khlaisamniang, Supas...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注游戏领域中NPC对话系统的具体应用，属于领域特定的对话生成问题。虽然涉及LLM技术，但其应用场景（游戏NPC）与推荐系统、搜索或广告领域没有直接关联，也不涉及核心推荐算法、检索技术或广告排序等关键技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:17:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13586v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13586v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The emergence of large language models (LLMs) has opened new opportunities for cre- ating dynamic non-player characters (NPCs) in gaming environments, enabling both func- tional task execution and persona-consistent dialogue generation. In this paper, we (Tu_Character_lab) report our participation in the Commonsense Persona-Grounded Dialogue Challenge (CPDC) 2025 Round 2, which eval- uates agents across three tracks: task-oriented dialogue, context-aware dialogue, and their integration. Our approach combines two complementary strategies: (i) lightweight prompting techniques in the API track, including a Deflanderization prompting method to suppress excessive role-play and improve task fidelity, and (ii) fine-tuned large models in the GPU track, leveraging Qwen3-14B with supervisedfinetuning (SFT) and Low-Rank Adaptation(LoRA). Our best submissions ranked 2nd on Task 1, 2nd on Task 3 (API track), and 4th on Task 3 (GPU track).
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13580v1" target="_blank" rel="noopener noreferrer">
                大型语言模型中低资源语言的稀疏子网络增强
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Sparse Subnetwork Enhancement for Underrepresented Languages in Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Daniil Gurgurov, Josef van Genabith, Simon Ostermann
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多语言能力增强和低资源语言处理，属于LLM能力扩展的范畴。虽然稀疏子网络技术本身可能对模型效率有改进，但论文聚焦于语言多样性而非推荐/搜索/广告的核心问题，潜在应用场景不明确且间接。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:14:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13580v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13580v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models exhibit uneven performance across languages, with substantial gaps between high- and low-resource languages. We present a framework for enhancing monolingual capabilities of LLMs in underrepresented languages while preserving their general-purpose performance through targeted fine-tuning of language-specific subnetworks. Our approach identifies language-specific neurons using Language Activation Probability Entropy and fine-tunes only the weights associated with these neurons, a dedicated subnetwork, on target-language data. Experiments on Llama-3.1-8B and Mistral-Nemo-12B across 12 mid- and low-resource languages demonstrate that our method consistently outperforms full fine-tuning, FFN-only fine-tuning, LoRA adaptation, and random subset fine-tuning baselines while efficiently updating only up to 1% of model parameters. Beyond performance improvements, we observe enhanced favorable training dynamics, cross-lingual representational alignment, and systematic weight update changes. To facilitate future research, we release language-specific neuron identifications for over 100 languages as well as our adaptation pipeline, offering a cost-effective pathway for adapting state-of-the-art models to underrepresented languages.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13499v1" target="_blank" rel="noopener noreferrer">
                ConsintBench：在真实世界消费者意图理解上评估语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ConsintBench: Evaluating Language Models on Real-World Consumer Intent Understanding
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiaozhe Li, TianYi Lyu, Siyi Yang, Yuxi Gong, Yizhao Yang, Jinxuan Huang, Ligao ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然消费者意图理解与搜索和推荐系统相关，但该论文主要关注语言模型的评估基准，这属于纯粹的NLP评估范畴。根据指导原则，评估基准、幻觉和纯粹NLP中心主题被视为不相关主题，因此该论文与当前关注点相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 12:49:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13499v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13499v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Understanding human intent is a complex, high-level task for large language models (LLMs), requiring analytical reasoning, contextual interpretation, dynamic information aggregation, and decision-making under uncertainty. Real-world public discussions, such as consumer product discussions, are rarely linear or involve a single user. Instead, they are characterized by interwoven and often conflicting perspectives, divergent concerns, goals, emotional tendencies, as well as implicit assumptions and background knowledge about usage scenarios. To accurately understand such explicit public intent, an LLM must go beyond parsing individual sentences; it must integrate multi-source signals, reason over inconsistencies, and adapt to evolving discourse, similar to how experts in fields like politics, economics, or finance approach complex, uncertain environments. Despite the importance of this capability, no large-scale benchmark currently exists for evaluating LLMs on real-world human intent understanding, primarily due to the challenges of collecting real-world public discussion data and constructing a robust evaluation pipeline. To bridge this gap, we introduce \bench, the first dynamic, live evaluation benchmark specifically designed for intent understanding, particularly in the consumer domain. \bench is the largest and most diverse benchmark of its kind, supporting real-time updates while preventing data contamination through an automated curation pipeline.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13494v1" target="_blank" rel="noopener noreferrer">
                LiteraryQA：面向长文档叙事问答的有效评估
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LiteraryQA: Towards Effective Evaluation of Long-document Narrative QA
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tommaso Bonomo, Luca Gioffré, Roberto Navigli
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于长文档问答的评估基准，属于纯粹的NLP评估主题，与推荐系统、搜索或广告的核心技术无关。虽然问答系统与搜索有一定关联，但该论文明确针对文学叙事领域的专业评估，缺乏在推荐、搜索或广告领域的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 12:43:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13494v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13494v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Question Answering (QA) on narrative text poses a unique challenge to current systems, requiring a deep understanding of long, complex documents. However, the reliability of NarrativeQA, the most widely used benchmark in this domain, is hindered by noisy documents and flawed QA pairs. In this work, we introduce LiteraryQA, a high-quality subset of NarrativeQA focused on literary works. Using a human- and LLM-validated pipeline, we identify and correct low-quality QA samples while removing extraneous text from source documents. We then carry out a meta-evaluation of automatic metrics to clarify how systems should be evaluated on LiteraryQA. This analysis reveals that all n-gram-based metrics have a low system-level correlation to human judgment, while LLM-as-a-Judge evaluations, even with small open-weight models, can strongly agree with the ranking identified by humans. Finally, we benchmark a set of long-context LLMs on LiteraryQA. We release our code and data at https://github.com/SapienzaNLP/LiteraryQA.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13434v1" target="_blank" rel="noopener noreferrer">
                超越单一奖励：面向机器翻译的多配对、多视角偏好优化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Single-Reward: Multi-Pair, Multi-Perspective Preference Optimization for Machine Translation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hao Wang, Linlong Xu, Heng Liu, Yangyang Liu, Xiaohu Zhao, Bo Zeng, Liangying Sh...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于机器翻译领域的偏好优化方法，虽然涉及多奖励和多视角优化，但核心应用场景是机器翻译这一特定NLP任务。对于推荐系统、搜索或广告领域，该方法可能缺乏直接的适用性，因为翻译任务的优化目标和特征与推荐/搜索的排序优化有本质差异。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 11:30:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13434v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13434v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Direct Preference Optimization (DPO) is a powerful paradigm for aligning Large Language Models (LLMs) to human preferences in Machine Translation (MT), but current methods are hindered by two fundamental challenges: (1) flawed reward signals from Quality Estimation (QE) models that overlook critical errors like translation hallucination, and (2) inefficient data utilization that discards valuable learning signals by selecting only a single win-loss pair. To address these limitations, we introduce M^2PO: Multi-Pair, Multi-Perspective Preference Optimization. Our framework integrates a multi-perspective reward engine that creates a more robust signal by combining two key viewpoints: a new hallucination penalty for factuality, and an innovative dynamic quality score that adaptively fuses external evaluations with the model's own evolving judgment. This is synergistically paired with a multi-pair construction strategy that systematically creates a comprehensive set of preference pairs from the entire pool of translation candidates. This synergistic approach ensures the model learns from a richer spectrum of quality trade-offs, leading to more robust and faithful translations. On challenging WMT21-22 benchmarks, M^2PO substantially outperforms existing preference optimization methods and demonstrates highly competitive performance against leading proprietary LLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13407v1" target="_blank" rel="noopener noreferrer">
                通过跨语言共词化模式研究词汇演变
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Investigating Lexical Change through Cross-Linguistic Colexification Patterns
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kim Gfeller, Sabine Stoll, Chundra Cathcart, Paul Widmer
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究语言学中的词汇演变和共词化模式，属于纯粹的语言学领域。虽然涉及语言模式分析，但缺乏与推荐系统、搜索或广告领域的直接关联，也没有展示出在Transformer架构、LLM技术或异构数据处理方面的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 11:04:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13407v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13407v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    One of the most intriguing features of language is its constant change, with ongoing shifts in how meaning is expressed. Despite decades of research, the factors that determine how and why meanings evolve remain only partly understood. Colexification -- the phenomenon of expressing multiple distinct concepts using the same word form -- serves as a valuable window onto the dynamics of meaning change across languages. Here, we apply phylogenetic comparative models to dictionary data from three language families, Austronesian, Indo-European, and Uralic, in order to shed light on the evolutionary dynamics underlying the colexification of concept pairs. We assess the effects of three predictors: associativity, borrowability, and usage frequency. Our results show that more closely related concept pairs are colexified across a larger portion of the family tree and exhibit slower rates of change. In contrast, concept pairs that are more frequent and more prone to borrowing tend to change more rapidly and are less often colexified. We also find considerable differences between the language families under study, suggesting that areal and cultural factors may play a role.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13395v1" target="_blank" rel="noopener noreferrer">
                用语言做事：重新思考大型语言模型中的心智理论模拟
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Doing Things with Words: Rethinking Theory of Mind Simulation in Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Agnese Lombardi, Alessandro Lenci
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文主要关注LLM中的心智理论（Theory of Mind）模拟，这是一个纯粹的认知科学和心理学导向的NLP研究主题。虽然心智理论可能对理解用户意图有间接帮助，但该论文没有展示明确的推荐系统、搜索或广告应用潜力，且更偏向理论心理学而非实际系统改进。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 10:48:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13395v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13395v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Language is fundamental to human cooperation, facilitating not only the exchange of information but also the coordination of actions through shared interpretations of situational contexts. This study explores whether the Generative Agent-Based Model (GABM) Concordia can effectively model Theory of Mind (ToM) within simulated real-world environments. Specifically, we assess whether this framework successfully simulates ToM abilities and whether GPT-4 can perform tasks by making genuine inferences from social context, rather than relying on linguistic memorization. Our findings reveal a critical limitation: GPT-4 frequently fails to select actions based on belief attribution, suggesting that apparent ToM-like abilities observed in previous studies may stem from shallow statistical associations rather than true reasoning. Additionally, the model struggles to generate coherent causal effects from agent actions, exposing difficulties in processing complex social interactions. These results challenge current statements about emergent ToM-like capabilities in LLMs and highlight the need for more rigorous, action-based evaluation frameworks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13387v1" target="_blank" rel="noopener noreferrer">
                提出无法拒绝的报价：在无需预先承诺的真实世界对话中实现贝叶斯劝说
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Make an Offer They Can't Refuse: Grounding Bayesian Persuasion in Real-World Dialogues without Pre-Commitment
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Buwei He, Yang Liu, Zhaowei Zhang, Zixia Jia, Huijia Wu, Zhaofeng He, Zilong Zhe...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究贝叶斯劝说理论在对话系统中的应用，属于对话AI和博弈论领域。虽然对话系统可能与搜索推荐有一定关联，但论文聚焦于劝说机制而非核心的推荐排序、检索或广告技术，与当前关注的核心领域进展和LLM技术应用相关性较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 10:26:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13387v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13387v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.GT</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Persuasion, a fundamental social capability for humans, remains a challenge for AI systems such as large language models (LLMs). Current studies often overlook the strategic use of information asymmetry in message design or rely on strong assumptions regarding pre-commitment. In this work, we explore the application of Bayesian Persuasion (BP) in natural language within single-turn dialogue settings, to enhance the strategic persuasion capabilities of LLMs. Our framework incorporates a commitment-communication mechanism, where the persuader explicitly outlines an information schema by narrating their potential types (e.g., honest or dishonest), thereby guiding the persuadee in performing the intended Bayesian belief update. We evaluate two variants of our approach: Semi-Formal-Natural-Language (SFNL) BP and Fully-Natural-Language (FNL) BP, benchmarking them against both naive and strong non-BP (NBP) baselines within a comprehensive evaluation framework. This framework covers a diverse set of persuadees -- including LLM instances with varying prompts and fine-tuning and human participants -- across tasks ranging from specially designed persuasion scenarios to general everyday situations. Experimental results on LLM-based agents reveal three main findings: (1) LLMs guided by BP strategies consistently achieve higher persuasion success rates than NBP baselines; (2) SFNL exhibits greater credibility and logical coherence, while FNL shows stronger emotional resonance and robustness in naturalistic conversations; (3) with supervised fine-tuning, smaller models can attain BP performance comparable to that of larger models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13351v1" target="_blank" rel="noopener noreferrer">
                Protect：面向可信企业级大语言模型系统的鲁棒护栏技术栈
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Protect: Towards Robust Guardrailing Stack for Trustworthy Enterprise LLM Systems
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Karthik Avinash, Nikhil Pareek, Rishav Hada
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注企业级LLM系统的护栏技术和可信性保障，这属于安全和可靠性范畴，而非核心推荐系统、搜索或广告的技术进展。虽然提到了LLM系统，但其焦点是防护机制而非LLM在推荐/搜索/广告中的直接应用或架构创新，因此与当前关注点相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:40:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13351v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13351v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The increasing deployment of Large Language Models (LLMs) across enterprise and mission-critical domains has underscored the urgent need for robust guardrailing systems that ensure safety, reliability, and compliance. Existing solutions often struggle with real-time oversight, multi-modal data handling, and explainability -- limitations that hinder their adoption in regulated environments. Existing guardrails largely operate in isolation, focused on text alone making them inadequate for multi-modal, production-scale environments. We introduce Protect, natively multi-modal guardrailing model designed to operate seamlessly across text, image, and audio inputs, designed for enterprise-grade deployment. Protect integrates fine-tuned, category-specific adapters trained via Low-Rank Adaptation (LoRA) on an extensive, multi-modal dataset covering four safety dimensions: toxicity, sexism, data privacy, and prompt injection. Our teacher-assisted annotation pipeline leverages reasoning and explanation traces to generate high-fidelity, context-aware labels across modalities. Experimental results demonstrate state-of-the-art performance across all safety dimensions, surpassing existing open and proprietary models such as WildGuard, LlamaGuard-4, and GPT-4.1. Protect establishes a strong foundation for trustworthy, auditable, and production-ready safety systems capable of operating across text, image, and audio modalities.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13302v1" target="_blank" rel="noopener noreferrer">
                用于作者归属与验证的LLM单样本风格迁移
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LLM one-shot style transfer for Authorship Attribution and Verification
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Pablo Miralles-González, Javier Huertas-Tato, Alejandro Martín, David Camacho
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM在文本风格迁移和作者识别方面的应用，这属于纯粹的NLP任务。虽然涉及LLM技术，但作者归属和验证与推荐系统、搜索或广告的核心领域没有直接关联，也没有明显的潜在应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:43:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13302v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13302v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Computational stylometry analyzes writing style through quantitative patterns in text, supporting applications from forensic tasks such as identity linking and plagiarism detection to literary attribution in the humanities. Supervised and contrastive approaches rely on data with spurious correlations and often confuse style with topic. Despite their natural use in AI-generated text detection, the CLM pre-training of modern LLMs has been scarcely leveraged for general authorship problems. We propose a novel unsupervised approach based on this extensive pre-training and the in-context learning capabilities of LLMs, employing the log-probabilities of an LLM to measure style transferability from one text to another. Our method significantly outperforms LLM prompting approaches of comparable scale and achieves higher accuracy than contrastively trained baselines when controlling for topical correlations. Moreover, performance scales fairly consistently with the size of the base model and, in the case of authorship verification, with an additional mechanism that increases test-time computation; enabling flexible trade-offs between computational cost and accuracy.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13276v1" target="_blank" rel="noopener noreferrer">
                MMLongCite：用于评估长上下文视觉语言模型保真度的基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MMLongCite: A Benchmark for Evaluating Fidelity of Long-Context Vision-Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Keyan Zhou, Zecheng Tang, Lingfeng Ming, Guanghao Zhou, Qiguang Chen, Dan Qiao, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉语言模型的评估基准，属于纯粹的VLM评估范畴，与推荐系统、搜索或广告的核心技术进展无关。虽然提到了长上下文处理，但缺乏明确的机制说明或在这些领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:22:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13276v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13276v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid advancement of large vision language models (LVLMs) has led to a significant expansion of their context windows. However, an extended context window does not guarantee the effective utilization of the context, posing a critical challenge for real-world applications. Current evaluations of such long-context faithfulness are predominantly focused on the text-only domain, while multimodal assessments remain limited to short contexts. To bridge this gap, we introduce MMLongCite, a comprehensive benchmark designed to evaluate the fidelity of LVLMs in long-context scenarios. MMLongCite comprises 8 distinct tasks spanning 6 context length intervals and incorporates diverse modalities, including text, images, and videos. Our evaluation of state-of-the-art LVLMs reveals their limited faithfulness in handling long multimodal contexts. Furthermore, we provide an in-depth analysis of how context length and the position of crucial content affect the faithfulness of these models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13271v1" target="_blank" rel="noopener noreferrer">
                你理解提示了吗？在棋盘游戏概念上对大型语言模型进行基准测试
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Do You Get the Hint? Benchmarking LLMs on the Board Game Concept
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ine Gevers, Walter Daelemans
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注在棋盘游戏上对LLM进行基准测试，这属于纯粹的LLM评估和基准测试范畴，与我的核心关注点无关。虽然涉及LLM，但没有展示在推荐系统、搜索或广告中的潜在应用，并且基准测试属于被排除的纯粹NLP中心主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:17:25
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13271v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13271v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language models (LLMs) have achieved striking successes on many benchmarks, yet recent studies continue to expose fundamental weaknesses. In particular, tasks that require abstract reasoning remain challenging, often because they use representations such as grids, symbols, or visual patterns that differ from the natural language data LLMs are trained on. In this paper, we introduce Concept, a simple word-guessing board game, as a benchmark for probing abductive reasoning in a representation that is much closer to LLM pre-training data: natural language. Our results show that this game, easily solved by humans (with a success rate of over 90\%), is still very challenging for state-of-the-art LLMs (no model exceeds 40\% success rate). Specifically, we observe that LLMs struggle with interpreting other players' strategic intents, and with correcting initial hypotheses given sequential information updates. In addition, we extend the evaluation across multiple languages, and find that the LLM performance drops further in lower-resource languages (Dutch, French, and Spanish) compared to English.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13255v1" target="_blank" rel="noopener noreferrer">
                层次频率标记探针（HFTP）：一种研究大型语言模型和人脑中句法结构表征的统一方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Hierarchical Frequency Tagging Probe (HFTP): A Unified Approach to Investigate Syntactic Structure Representations in Large Language Models and the Human Brain
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jingmin An, Yilong Song, Ruolin Yang, Nai Ding, Lingxi Lu, Yuxuan Wang, Wei Wang...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM中句法结构的神经表征研究，这属于纯NLP基础研究范畴。虽然涉及LLM内部表征分析，但聚焦于句法结构这种语言特异性问题，缺乏明确的推荐系统、搜索或广告应用前景。论文更像是探索LLM与人脑认知机制的对比研究，而非开发可应用于实际业务场景的技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:04:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13255v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13255v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.NE</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Language Models (LLMs) demonstrate human-level or even superior language abilities, effectively modeling syntactic structures, yet the specific computational modules responsible remain unclear. A key question is whether LLM behavioral capabilities stem from mechanisms akin to those in the human brain. To address these questions, we introduce the Hierarchical Frequency Tagging Probe (HFTP), a tool that utilizes frequency-domain analysis to identify neuron-wise components of LLMs (e.g., individual Multilayer Perceptron (MLP) neurons) and cortical regions (via intracranial recordings) encoding syntactic structures. Our results show that models such as GPT-2, Gemma, Gemma 2, Llama 2, Llama 3.1, and GLM-4 process syntax in analogous layers, while the human brain relies on distinct cortical regions for different syntactic levels. Representational similarity analysis reveals a stronger alignment between LLM representations and the left hemisphere of the brain (dominant in language processing). Notably, upgraded models exhibit divergent trends: Gemma 2 shows greater brain similarity than Gemma, while Llama 3.1 shows less alignment with the brain compared to Llama 2. These findings offer new insights into the interpretability of LLM behavioral improvements, raising questions about whether these advancements are driven by human-like or non-human-like mechanisms, and establish HFTP as a valuable tool bridging computational linguistics and cognitive neuroscience. This project is available at https://github.com/LilTiger/HFTP.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13220v1" target="_blank" rel="noopener noreferrer">
                EvoTest：用于自改进智能体系统的进化式测试时学习
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            EvoTest: Evolutionary Test-Time Learning for Self-Improving Agentic Systems
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yufei He, Juncheng Liu, Yue Liu, Yibo Li, Tri Cao, Zhiyuan Hu, Xinxing Xu, Bryan...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注智能体系统的测试时学习和自改进能力，这属于通用AI智能体技术，与推荐系统、搜索或广告的核心技术领域关联较弱。虽然测试时学习概念在理论上可能应用于在线学习场景，但论文标题未表明任何具体的RecSys/Search/Ads应用或相关技术组件。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:16:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13220v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13220v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    A fundamental limitation of current AI agents is their inability to learn complex skills on the fly at test time, often behaving like "clever but clueless interns" in novel environments. This severely limits their practical utility. To systematically measure and drive progress on this challenge, we first introduce the Jericho Test-Time Learning (J-TTL) benchmark. J-TTL is a new evaluation setup where an agent must play the same game for several consecutive episodes, attempting to improve its performance from one episode to the next. On J-TTL, we find that existing adaptation methods like reflection, memory, or reinforcement learning struggle. To address the challenges posed by our benchmark, we present EvoTest, an evolutionary test-time learning framework that improves an agent without any fine-tuning or gradients-by evolving the entire agentic system after every episode. EvoTest has two roles: the Actor Agent, which plays the game, and the Evolver Agent, which analyzes the episode transcript to propose a revised configuration for the next run. This configuration rewrites the prompt, updates memory by logging effective state-action choices, tunes hyperparameters, and learns the tool-use routines. On our J-TTL benchmark, EvoTest consistently increases performance, outperforming not only reflection and memory-only baselines but also more complex online fine-tuning methods. Notably, our method is the only one capable of winning two games (Detective and Library), while all baselines fail to win any.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13215v1" target="_blank" rel="noopener noreferrer">
                基于目标驱动学习者状态建模的个性化学习路径规划
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Personalized Learning Path Planning with Goal-Driven Learner State Modeling
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Joy Jia Yin Lim, Ye He, Jifan Yu, Xin Cong, Daniel Zhang-Li, Zhiyuan Liu, Huiqin...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注教育领域的个性化学习路径规划，虽然涉及个性化推荐概念，但属于教育技术领域而非推荐系统/搜索/广告的核心范畴。论文的技术方法（目标驱动状态建模）可能对推荐系统有启发，但这种应用过于间接且非核心关注点。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:59:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13215v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13215v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Personalized Learning Path Planning (PLPP) aims to design adaptive learning paths that align with individual goals. While large language models (LLMs) show potential in personalizing learning experiences, existing approaches often lack mechanisms for goal-aligned planning. We introduce Pxplore, a novel framework for PLPP that integrates a reinforcement-based training paradigm and an LLM-driven educational architecture. We design a structured learner state model and an automated reward function that transforms abstract objectives into computable signals. We train the policy combining supervised fine-tuning (SFT) and Group Relative Policy Optimization (GRPO), and deploy it within a real-world learning platform. Extensive experiments validate Pxplore's effectiveness in producing coherent, personalized, and goal-driven learning paths. We release our code and dataset to facilitate future research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13211v1" target="_blank" rel="noopener noreferrer">
                基于图像与文本分析的、全自动可扩展的低资源语言并行数据增强方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A fully automated and scalable Parallel Data Augmentation for Low Resource Languages using Image and Text Analytics
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Prawaal Sharma, Navneet Goyal, Poonam Goyal, Vishnupriyan R
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注低资源语言的数据增强技术，虽然涉及文本分析，但其核心应用场景是机器翻译等NLP任务，而非推荐系统、搜索或广告领域。论文提到的图像和文本分析可能在某些边缘情况下用于多模态推荐，但缺乏明确的RecSys/Search/Ads应用连接。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:57:23
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13211v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13211v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Linguistic diversity across the world creates a disparity with the availability of good quality digital language resources thereby restricting the technological benefits to majority of human population. The lack or absence of data resources makes it difficult to perform NLP tasks for low-resource languages. This paper presents a novel scalable and fully automated methodology to extract bilingual parallel corpora from newspaper articles using image and text analytics. We validate our approach by building parallel data corpus for two different language combinations and demonstrate the value of this dataset through a downstream task of machine translation and improve over the current baseline by close to 3 BLEU points.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13202v1" target="_blank" rel="noopener noreferrer">
                LLM引导的合成增强（LGSA）用于减轻AI系统中的偏见
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LLM-Guided Synthetic Augmentation (LGSA) for Mitigating Bias in AI Systems
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sai Suhruth Reddy Karri, Yashwanth Sai Nallapuneni, Laxmi Narasimha Reddy Mallir...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然论文涉及LLM技术，但其核心关注点是偏见缓解，这属于公平性、伦理等非技术性话题，明确列在无关主题中。该论文没有展示在推荐系统、搜索或广告中的直接应用潜力，主要解决的是AI系统的社会技术问题而非核心算法进步。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:42:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13202v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13202v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Bias in AI systems, especially those relying on natural language data, raises ethical and practical concerns. Underrepresentation of certain groups often leads to uneven performance across demographics. Traditional fairness methods, such as pre-processing, in-processing, and post-processing, depend on protected-attribute labels, involve accuracy-fairness trade-offs, and may not generalize across datasets. To address these challenges, we propose LLM-Guided Synthetic Augmentation (LGSA), which uses large language models to generate counterfactual examples for underrepresented groups while preserving label integrity. We evaluated LGSA on a controlled dataset of short English sentences with gendered pronouns, professions, and binary classification labels. Structured prompts were used to produce gender-swapped paraphrases, followed by quality control including semantic similarity checks, attribute verification, toxicity screening, and human spot checks. The augmented dataset expanded training coverage and was used to train a classifier under consistent conditions. Results show that LGSA reduces performance disparities without compromising accuracy. The baseline model achieved 96.7 percent accuracy with a 7.2 percent gender bias gap. Simple swap augmentation reduced the gap to 0.7 percent but lowered accuracy to 95.6 percent. LGSA achieved 99.1 percent accuracy with a 1.9 percent bias gap, improving performance on female-labeled examples. These findings demonstrate that LGSA is an effective strategy for bias mitigation, enhancing subgroup balance while maintaining high task accuracy and label fidelity.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13197v1" target="_blank" rel="noopener noreferrer">
                基于简化隔离核的文本异常检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Text Anomaly Detection with Simplified Isolation Kernel
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yang Cao, Sikun Yang, Yujiu Yang, Lianyong Qi, Ming Liu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本异常检测这一特定NLP任务，属于纯文本处理范畴，与推荐系统、搜索或广告的核心技术关联度较低。虽然异常检测在理论上可能用于识别异常用户行为，但论文标题明确限定于文本数据，且未提及任何与推荐、搜索或广告相关的应用场景，因此相关性较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:35:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13197v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13197v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Two-step approaches combining pre-trained large language model embeddings and anomaly detectors demonstrate strong performance in text anomaly detection by leveraging rich semantic representations. However, high-dimensional dense embeddings extracted by large language models pose challenges due to substantial memory requirements and high computation time. To address this challenge, we introduce the Simplified Isolation Kernel (SIK), which maps high-dimensional dense embeddings to lower-dimensional sparse representations while preserving crucial anomaly characteristics. SIK has linear time complexity and significantly reduces space complexity through its innovative boundary-focused feature mapping. Experiments across 7 datasets demonstrate that SIK achieves better detection performance than 11 state-of-the-art (SOTA) anomaly detection algorithms while maintaining computational efficiency and low memory cost. All code and demonstrations are available at https://github.com/charles-cao/SIK.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13190v1" target="_blank" rel="noopener noreferrer">
                SHIELD：用于鲁棒且更安全大视觉语言模型的分类器引导提示
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            SHIELD: Classifier-Guided Prompting for Robust and Safer LVLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Juan Ren, Mark Dras, Usman Naseem
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注大视觉语言模型的安全性和鲁棒性，属于VLM安全增强技术。虽然涉及视觉语言模型，但其核心焦点是安全防护而非异构数据统一建模，与VLM类比异构数据的关注点仅有微弱关联。在推荐/搜索/广告领域，这种安全技术可能用于内容安全过滤，但应用潜力有限且非核心关注方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:27:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13190v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13190v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Vision-Language Models (LVLMs) unlock powerful multimodal reasoning but also expand the attack surface, particularly through adversarial inputs that conceal harmful goals in benign prompts. We propose SHIELD, a lightweight, model-agnostic preprocessing framework that couples fine-grained safety classification with category-specific guidance and explicit actions (Block, Reframe, Forward). Unlike binary moderators, SHIELD composes tailored safety prompts that enforce nuanced refusals or safe redirection without retraining. Across five benchmarks and five representative LVLMs, SHIELD consistently lowers jailbreak and non-following rates while preserving utility. Our method is plug-and-play, incurs negligible overhead, and is easily extendable to new attack types -- serving as a practical safety patch for both weakly and strongly aligned LVLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13183v1" target="_blank" rel="noopener noreferrer">
                DSCD：基于自约束解码的大语言模型去毒
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DSCD: Large Language Model Detoxification with Self-Constrained Decoding
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ming Dong, Jinkui Zhang, Bolong Zheng, Xinhui Tu, Po Hu, Tingting He
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于大语言模型的安全性和内容过滤问题（去毒），这属于模型安全范畴而非推荐系统、搜索或广告的核心技术。虽然去毒技术可能间接影响内容推荐的质量，但论文本身不涉及排序算法、用户建模或广告投放等核心领域，也不属于Transformer架构改进或异构数据建模等使能技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:10:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13183v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13183v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Detoxification in large language models (LLMs) remains a significant research challenge. Existing decoding detoxification methods are all based on external constraints, which require additional resource overhead and lose generation fluency. This work proposes Detoxification with Self-Constrained Decoding (DSCD), a novel method for LLM detoxification without parameter fine-tuning. DSCD strengthens the inner next-token distribution of the safety layer while weakening that of hallucination and toxic layers during output generation. This effectively diminishes toxicity and enhances output safety. DSCD offers lightweight, high compatibility, and plug-and-play capabilities, readily integrating with existing detoxification methods for further performance improvement. Extensive experiments on representative open-source LLMs and public datasets validate DSCD's effectiveness, demonstrating state-of-the-art (SOTA) performance in both detoxification and generation fluency, with superior efficiency compared to existing methods. These results highlight DSCD's potential as a practical and scalable solution for safer LLM deployments.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13166v1" target="_blank" rel="noopener noreferrer">
                CoT-Evo：用于科学推理的思维链进化蒸馏
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CoT-Evo: Evolutionary Distillation of Chain-of-Thought for Scientific Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kehua Feng, Keyan Ding, Zhihui Zhu, Lei Liang, Qiang Zhang, Huajun Chen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于科学推理领域的思维链蒸馏技术，属于纯粹的NLP推理优化范畴。虽然思维链技术本身是LLM的重要能力，但论文明确限定在科学推理应用场景，没有展示在推荐系统、搜索或广告领域的潜在应用价值，与当前关注的技术方向关联度较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 05:29:56
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13166v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13166v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    While chain-of-thought (CoT) distillation from advanced large language models (LLMs) has proven effective in general reasoning tasks, it struggles in scientific domains where even advanced models often produce incorrect or superficial reasoning due to high complexity and specialized knowledge requirements. Directly distilling from such flawed outputs results in low-quality training data and limits the performance of smaller student models. To overcome this, we propose CoT-Evo, an evolutionary CoT distillation framework. It begins by constructing a diverse pool of reasoning trajectories from multiple LLM thinkers, enriches them with automatically retrieved domain knowledge, and iteratively refines the trajectories using novelty-driven selection, reflective recombination and mutation. The refinement is guided by a fitness function that evaluates answer correctness, coherence, and effective knowledge utilization. This results in a high-quality CoT dataset tailored for scientific reasoning. We employ this evolved dataset to fine-tune a compact model, which achieves state-of-the-art performance on scientific reasoning benchmarks. Our work establishes a scalable approach to synthesizing high-fidelity scientific reasoning data from diverse and fallible LLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13163v1" target="_blank" rel="noopener noreferrer">
                表征问题：基于图的抽象代码生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            A Matter of Representation: Towards Graph-Based Abstract Code Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nyx Iskandar, Hisham Bedri, Andy Tsen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注代码生成任务，属于纯粹的NLP应用领域，与推荐系统、搜索或广告的核心技术没有直接关联。虽然图神经网络技术可能在某些推荐系统中使用，但论文的抽象代码生成应用场景与我的关注领域相距甚远。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 05:26:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13163v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13163v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Most large language models (LLMs) today excel at generating raw, sequential code with minimal abstractions and custom structures. However, there has been little work on graph-based abstract code generation, where significant logic is encapsulated in predefined nodes and execution flow is determined by edges. This is relevant for visual programming languages, and in cases where raw source code is inaccessible to users and LLM training sets. In this work, we propose and evaluate JSON representations for graphs to enable high accuracy graph-based abstract code generation. We evaluate these representations on ScratchTest, a mini-benchmark based on our custom Python re-implementation of Scratch, which tests the LLM in code graph space. Our findings demonstrate that LLMs can indeed perform the aforementioned generation task in a single pass without relying on specialized or complex pipelines, given the correct graph representations. We also show that different representations induce significantly different accuracies, highlighting the instrumental role of representations in this generation task. All in all, this work establishes the first steps towards representation learning for graph-based abstract code generation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13157v1" target="_blank" rel="noopener noreferrer">
                用于金融推理的思维程序：利用动态上下文示例和生成式检索
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Program of Thoughts for Financial Reasoning: Leveraging Dynamic In-Context Examples and Generative Retrieval
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Subhendu Khatuya, Shashwat Naidu, Pawan Goyal, Niloy Ganguly
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注金融领域的推理任务，属于特定领域应用而非核心推荐系统、搜索或广告技术。虽然涉及思维链和上下文学习等LLM技术，但缺乏与推荐系统、搜索或广告的直接关联或潜在应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 05:16:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13157v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13157v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CE</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Despite continuous advancements in the capabilities of large language models (LLMs), numerical reasoning remains a challenging area. Techniques like chain-of-thought prompting, tree-of-thought prompting, and program-of-thought prompting guide LLMs through intermediate reasoning steps. Although in-context learning with few-shot prompting has improved performance, LLMs still lag behind state-of-the-art models on financial numerical reasoning datasets such as FinQA and ConvFinQA. In this work, we introduce FINDER, a novel two-step framework, to enhance LLMs' capabilities in financial numerical reasoning. The first step utilizes a generative retriever to extract relevant facts from unstructured data, including both text and tables. This is followed by context-aware Program of Thought prompting with dynamic selection of in-context examples. Our model FINDER achieves a new state-of-the-art performance on both the FinQA and ConvFinQA datasets, surpassing previous benchmarks with execution accuracy improvements of 5.98% and 4.05%, respectively.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13154v1" target="_blank" rel="noopener noreferrer">
                我是对齐的，但和谁对齐？面向中东和北非价值观的基准，用于评估大型语言模型的文化对齐与多语言偏见
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            I Am Aligned, But With Whom? MENA Values Benchmark for Evaluating Cultural Alignment and Multilingual Bias in LLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Pardis Sadat Zahraei, Ehsaneddin Asgari
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注LLM的文化对齐评估和多语言偏见检测，这属于伦理评估和公平性范畴，属于明确的无关主题。虽然提到了多语言能力，但核心焦点是价值观对齐评估而非技术改进，对推荐系统、搜索或广告的技术进步没有直接贡献。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 05:10:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13154v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13154v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce MENAValues, a novel benchmark designed to evaluate the cultural alignment and multilingual biases of large language models (LLMs) with respect to the beliefs and values of the Middle East and North Africa (MENA) region, an underrepresented area in current AI evaluation efforts. Drawing from large-scale, authoritative human surveys, we curate a structured dataset that captures the sociocultural landscape of MENA with population-level response distributions from 16 countries. To probe LLM behavior, we evaluate diverse models across multiple conditions formed by crossing three perspective framings (neutral, personalized, and third-person/cultural observer) with two language modes (English and localized native languages: Arabic, Persian, Turkish). Our analysis reveals three critical phenomena: "Cross-Lingual Value Shifts" where identical questions yield drastically different responses based on language, "Reasoning-Induced Degradation" where prompting models to explain their reasoning worsens cultural alignment, and "Logit Leakage" where models refuse sensitive questions while internal probabilities reveal strong hidden preferences. We further demonstrate that models collapse into simplistic linguistic categories when operating in native languages, treating diverse nations as monolithic entities. MENAValues offers a scalable framework for diagnosing cultural misalignment, providing both empirical insights and methodological tools for developing more culturally inclusive AI.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13139v1" target="_blank" rel="noopener noreferrer">
                解决交通政策制定中的对齐问题：一种大语言模型方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Addressing the alignment problem in transportation policy making: an LLM approach
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiaoyu Yan, Tianxing Dai, Yu, Nie
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文虽然涉及LLM应用，但聚焦于交通政策制定这一特定领域，与推荐系统、搜索或广告的核心技术领域相关性较弱。交通政策属于城市规划和公共管理范畴，不属于当前关注的RecSys/Search/Ads技术领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 04:36:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13139v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13139v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CY</span><span class="category-tag">cs.CE</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.MA</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    A key challenge in transportation planning is that the collective preferences of heterogeneous travelers often diverge from the policies produced by model-driven decision tools. This misalignment frequently results in implementation delays or failures. Here, we investigate whether large language models (LLMs), noted for their capabilities in reasoning and simulating human decision-making, can help inform and address this alignment problem. We develop a multi-agent simulation in which LLMs, acting as agents representing residents from different communities in a city, participate in a referendum on a set of transit policy proposals. Using chain-of-thought reasoning, LLM agents provide ranked-choice or approval-based preferences, which are aggregated using instant-runoff voting (IRV) to model democratic consensus. We implement this simulation framework with both GPT-4o and Claude-3.5, and apply it for Chicago and Houston. Our findings suggest that LLM agents are capable of approximating plausible collective preferences and responding to local context, while also displaying model-specific behavioral biases and modest divergences from optimization-based benchmarks. These capabilities underscore both the promise and limitations of LLMs as tools for solving the alignment problem in transportation decision-making.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13117v1" target="_blank" rel="noopener noreferrer">
                关于掩码扩散语言模型推理能力的研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            On the Reasoning Abilities of Masked Diffusion Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Anej Svete, Ashish Sabharwal
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要研究扩散语言模型的推理能力，属于纯粹的LLM能力评估范畴。虽然推理能力在理论上可能对推荐和搜索有帮助，但论文本身聚焦于基础模型能力分析，没有明确展示在推荐系统、搜索或广告领域的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 03:29:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13117v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13117v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.LG</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Masked diffusion models (MDMs) for text offer a compelling alternative to traditional autoregressive language models. Parallel generation makes them efficient, but their computational capabilities and the limitations inherent to their parallelism remain largely unexplored. To this end, we characterize what types of reasoning problems MDMs can provably solve and how efficiently. We do this by connecting MDMs to the well-understood reasoning frameworks of chain of thought (CoT) and padded looped transformers (PLTs) in the finite-precision log-width setting: We show that MDMs and polynomially-padded PLTs are, in fact, equivalent in this setting, and that MDMs can solve all problems that CoT-augmented transformers can. Moreover, we showcase classes of problems (including regular languages) for which MDMs are inherently more efficient than CoT transformers, where parallel generation allows for substantially faster reasoning.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13808v1" target="_blank" rel="noopener noreferrer">
                VisCoP：面向视觉语言模型视频领域自适应的视觉探测方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VisCoP: Visual Probing for Video Domain Adaptation of Vision Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Dominick Reilly, Manish Kumar Govind, Le Xue, Srijan Das
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉语言模型在视频领域的适应性问题，属于计算机视觉和视频理解领域。虽然提到了视觉语言模型，但其应用场景（视频领域适应）与推荐系统、搜索或广告的核心技术关联度很低。视频领域适应技术可能对多媒体内容理解有一定帮助，但这种间接联系在当前聚焦范围内相关性较弱。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:59:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13808v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13808v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Vision-Language Models (VLMs) excel at general visual reasoning tasks but exhibit sharp performance degradation when applied to novel domains with substantial distribution shifts from pretraining data. Existing domain adaptation approaches finetune different VLM components, but this often results in limited domain-specific feature learning or catastrophic forgetting of prior capabilities. To address these issues, we introduce Vision Contextualized Probing (VisCoP), which augments the VLM's vision encoder with a compact set of learnable visual probes. These probes enable efficient domain-specific adaptation with minimal modification to pretrained parameters. We evaluate VisCoP across three challenging domain adaptation settings-cross-view (exocentric to egocentric), cross-modal (RGB to depth), and cross-task (human understanding to robot control). Experiments show that VisCoP consistently outperforms existing adaptation strategies, achieving superior performance on target domains while effectively retaining source-domain knowledge.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13802v1" target="_blank" rel="noopener noreferrer">
                追踪万物：通过轨迹场实现任意视频的4D表示
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Trace Anything: Representing Any Video in 4D via Trajectory Fields
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xinhang Liu, Yuxi Xiao, Donny Y. Chen, Jiashi Feng, Yu-Wing Tai, Chi-Keung Tang,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视频理解和4D表示学习，属于计算机视觉领域，与推荐系统、搜索或广告的核心技术关联较弱。虽然轨迹场技术理论上可以用于建模用户行为序列，但论文本身没有明确展示在RecSys/Search/Ads领域的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:59:04
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13802v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13802v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Effective spatio-temporal representation is fundamental to modeling, understanding, and predicting dynamics in videos. The atomic unit of a video, the pixel, traces a continuous 3D trajectory over time, serving as the primitive element of dynamics. Based on this principle, we propose representing any video as a Trajectory Field: a dense mapping that assigns a continuous 3D trajectory function of time to each pixel in every frame. With this representation, we introduce Trace Anything, a neural network that predicts the entire trajectory field in a single feed-forward pass. Specifically, for each pixel in each frame, our model predicts a set of control points that parameterizes a trajectory (i.e., a B-spline), yielding its 3D position at arbitrary query time instants. We trained the Trace Anything model on large-scale 4D data, including data from our new platform, and our experiments demonstrate that: (i) Trace Anything achieves state-of-the-art performance on our new benchmark for trajectory field estimation and performs competitively on established point-tracking benchmarks; (ii) it offers significant efficiency gains thanks to its one-pass paradigm, without requiring iterative optimization or auxiliary estimators; and (iii) it exhibits emergent abilities, including goal-conditioned manipulation, motion forecasting, and spatio-temporal fusion. Project page: https://trace-anything.github.io/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13800v1" target="_blank" rel="noopener noreferrer">
                通过世界中的接地在空间中进行推理
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Reasoning in Space via Grounding in the World
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yiming Chen, Zekun Qi, Wenyao Zhang, Xin Jin, Li Zhang, Peidong Liu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题暗示了空间推理和物理世界接地，这主要与具身AI、机器人技术或物理世界交互相关。虽然空间推理在某些搜索场景中可能有边缘应用，但缺乏与推荐系统、广告或核心LLM技术的直接联系，且未明确涉及Transformer架构或异构数据建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:58:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13800v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13800v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In this paper, we claim that 3D visual grounding is the cornerstone of spatial reasoning and introduce the Grounded-Spatial Reasoner (GS-Reasoner) to explore the effective spatial representations that bridge the gap between them. Existing 3D LLMs suffer from the absence of a unified 3D representation capable of jointly capturing semantic and geometric information. This deficiency is manifested either in poor performance on grounding or in an excessive reliance on external modules, ultimately hindering the seamless integration of grounding and spatial reasoning. To address this, we propose a simple yet effective dual-path pooling mechanism that tightly aligns geometric features with both semantic and positional cues, constructing a unified image patch-based 3D representation that encapsulates all essential information without increasing the number of input tokens. Leveraging this holistic representation, GS-Reasoner is the first 3D LLM that achieves autoregressive grounding entirely without external modules while delivering performance comparable to state-of-the-art models, establishing a unified and self-contained framework for 3D spatial reasoning. To further bridge grounding and spatial reasoning, we introduce the Grounded Chain-of-Thought (GCoT) dataset. This dataset is meticulously curated to include both 3D bounding box annotations for objects referenced in reasoning questions and step-by-step reasoning paths that integrate grounding as a core component of the problem-solving process. Extensive experiments demonstrate that GS-Reasoner achieves impressive results on 3D visual grounding, which in turn significantly enhances its spatial reasoning capabilities, leading to state-of-the-art performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13795v1" target="_blank" rel="noopener noreferrer">
                Bee：一个高质量语料库与全栈套件，用于解锁先进的完全开放多模态大语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Bee: A High-Quality Corpus and Full-Stack Suite to Unlock Advanced Fully Open MLLMs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yi Zhang, Bolin Ni, Xin-Sheng Chen, Heng-Rui Zhang, Yongming Rao, Houwen Peng, Q...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多模态大语言模型（MLLMs）的语料库构建和开源工具套件，属于纯粹的LLM技术基础设施。虽然多模态建模与VLM类比有概念关联，但论文没有明确展示在推荐系统、搜索或广告中的具体应用潜力，且更偏向通用多模态AI而非特定领域应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:52:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13795v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13795v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Fully open multimodal large language models (MLLMs) currently lag behind proprietary counterparts, primarily due to a significant gap in data quality for supervised fine-tuning (SFT). Existing open-source datasets are often plagued by widespread noise and a critical deficit in complex reasoning data, such as Chain-of-Thought (CoT), which hinders the development of advanced model capabilities. Addressing these challenges, our work makes three primary contributions. First, we introduce Honey-Data-15M, a new SFT dataset comprising approximately 15 million QA pairs, processed through multiple cleaning techniques and enhanced with a novel dual-level (short and long) CoT enrichment strategy. Second, we introduce HoneyPipe, the data curation pipeline, and its underlying framework DataStudio, providing the community with a transparent and adaptable methodology for data curation that moves beyond static dataset releases. Finally, to validate our dataset and pipeline, we train Bee-8B, an 8B model on Honey-Data-15M. Experiments show that Bee-8B establishes a new state-of-the-art (SOTA) for fully open MLLMs, achieving performance that is competitive with, and in some cases surpasses, recent semi-open models such as InternVL3.5-8B. Our work delivers to the community a suite of foundational resources, including: the Honey-Data-15M corpus; the full-stack suite comprising HoneyPipe and DataStudio; training recipes; an evaluation harness; and the model weights. This effort demonstrates that a principled focus on data quality is a key pathway to developing fully open MLLMs that are highly competitive with their semi-open counterparts.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13768v1" target="_blank" rel="noopener noreferrer">
                使用平面映射扩展视觉变换器用于功能性磁共振成像
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Scaling Vision Transformers for Functional MRI with Flat Maps
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Connor Lane, Daniel Z. Kaplan, Tanishq Mathew Abraham, Paul S. Scotti
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学影像（fMRI）领域的视觉变换器应用，属于明确的无关主题范畴。虽然涉及Transformer架构扩展，但其在功能性磁共振成像的特定医学应用与推荐系统、搜索或广告领域没有明显的技术关联或潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:15:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13768v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13768v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">q-bio.NC</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    A key question for adapting modern deep learning architectures to functional MRI (fMRI) is how to represent the data for model input. To bridge the modality gap between fMRI and natural images, we transform the 4D volumetric fMRI data into videos of 2D fMRI activity flat maps. We train Vision Transformers on 2.3K hours of fMRI flat map videos from the Human Connectome Project using the spatiotemporal masked autoencoder (MAE) framework. We observe that masked fMRI modeling performance improves with dataset size according to a strict power scaling law. Downstream classification benchmarks show that our model learns rich representations supporting both fine-grained state decoding across subjects, as well as subject-specific trait decoding across changes in brain state. This work is part of an ongoing open science project to build foundation models for fMRI data. Our code and datasets are available at https://github.com/MedARC-AI/fmri-fm.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13756v1" target="_blank" rel="noopener noreferrer">
                RECODE：通过代码生成进行推理的视觉问答方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            RECODE: Reasoning Through Code Generation for Visual Question Answering
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Junhong Shen, Mu Cai, Bo Hu, Ameet Talwalkar, David A Ross, Cordelia Schmid, Ali...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视觉问答领域的代码生成推理方法，属于纯粹的视觉-语言交叉研究。虽然涉及多模态建模，但其应用场景和核心方法都局限于视觉问答这一特定NLP任务，与推荐系统、搜索或广告领域没有明显的技术迁移路径或应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:05:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13756v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13756v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multimodal Large Language Models (MLLMs) struggle with precise reasoning for structured visuals like charts and diagrams, as pixel-based perception lacks a mechanism for verification. To address this, we propose to leverage derendering -- the process of reverse-engineering visuals into executable code -- as a new modality for verifiable visual reasoning. Specifically, we propose RECODE, an agentic framework that first generates multiple candidate programs to reproduce the input image. It then uses a critic to select the most faithful reconstruction and iteratively refines the code. This process not only transforms an ambiguous perceptual task into a verifiable, symbolic problem, but also enables precise calculations and logical inferences later on. On various visual reasoning benchmarks such as CharXiv, ChartQA, and Geometry3K, RECODE significantly outperforms methods that do not leverage code or only use code for drawing auxiliary lines or cropping. Our work demonstrates that grounding visual perception in executable code provides a new path toward more accurate and verifiable multimodal reasoning.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13747v1" target="_blank" rel="noopener noreferrer">
                InteractiveOmni：面向音视频多轮对话的统一全模态模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            InteractiveOmni: A Unified Omni-modal Model for Audio-Visual Multi-turn Dialogue
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Wenwen Tong, Hewei Guo, Dongchuan Ran, Jiangnan Chen, Jiefan Lu, Kaibin Wang, Ke...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文虽然涉及多模态建模，但其核心聚焦于音视频对话系统，这与推荐系统、搜索或广告的核心技术需求关联度较低。尽管多模态统一建模的思想可能启发异构数据处理，但缺乏明确的RecSys/Search/Ads应用场景，因此相关性有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:52:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13747v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13747v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce InteractiveOmni, a unified and open-source omni-modal large language model for audio-visual multi-turn interaction, ranging from 4B to 8B parameters, designed to lead the field of lightweight models by offering comprehensive omni-modal understanding and speech generation capabilities. To achieve this, we integrate the vision encoder, audio encoder, large language model, and speech decoder into a unified model for understanding and generation tasks. We design a multi-stage training strategy to ensure robust cross-modal capabilities, including pre-training for omni-modal understanding, followed by post-training with speech conversation and audio-visual interaction. To enable human-like long-term conversational ability, we meticulously curate a multi-turn training dataset that enhances the model's ability to handle complex and multi-turn interactions. To effectively evaluate the multi-turn memory and speech interaction capabilities, we construct the multi-modal multi-turn memory benchmark and the multi-turn speech interaction benchmark. Experiments demonstrate that InteractiveOmni significantly outperforms leading open-source models and provides a more intelligent multi-turn audio-visual experience, particularly in its long-term memory capabilities. Notably, InteractiveOmni-4B is comparable to the much larger model like Qwen2.5-Omni-7B on general benchmarks, and it can retain 97% of the performance of the InteractiveOmni-8B while utilizing only 50% of the model size. Achieving state-of-the-art results against similarly sized models across image, audio, video understanding, and speech generation tasks, InteractiveOmni is an accessible, open-source foundation for next-generation intelligent interactive systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13740v1" target="_blank" rel="noopener noreferrer">
                用于高效视觉图神经网络的多尺度高分辨率对数图绘制模块
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Multi-Scale High-Resolution Logarithmic Grapher Module for Efficient Vision GNNs
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Mustafa Munir, Alex Zhang, Radu Marculescu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉图神经网络(GNN)的效率优化，属于计算机视觉架构改进。虽然提到了多尺度特征处理，但缺乏与推荐系统、搜索或广告领域的直接关联。论文专注于视觉GNN的特定模块设计，没有展示在异构数据处理或序列建模方面的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:47:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13740v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13740v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision graph neural networks (ViG) have demonstrated promise in vision tasks as a competitive alternative to conventional convolutional neural nets (CNN) and transformers (ViTs); however, common graph construction methods, such as k-nearest neighbor (KNN), can be expensive on larger images. While methods such as Sparse Vision Graph Attention (SVGA) have shown promise, SVGA's fixed step scale can lead to over-squashing and missing multiple connections to gain the same information that could be gained from a long-range link. Through this observation, we propose a new graph construction method, Logarithmic Scalable Graph Construction (LSGC) to enhance performance by limiting the number of long-range links. To this end, we propose LogViG, a novel hybrid CNN-GNN model that utilizes LSGC. Furthermore, inspired by the successes of multi-scale and high-resolution architectures, we introduce and apply a high-resolution branch and fuse features between our high-resolution and low-resolution branches for a multi-scale high-resolution Vision GNN network. Extensive experiments show that LogViG beats existing ViG, CNN, and ViT architectures in terms of accuracy, GMACs, and parameters on image classification and semantic segmentation tasks. Our smallest model, Ti-LogViG, achieves an average top-1 accuracy on ImageNet-1K of 79.9% with a standard deviation of 0.2%, 1.7% higher average accuracy than Vision GNN with a 24.3% reduction in parameters and 35.3% reduction in GMACs. Our work shows that leveraging long-range links in graph construction for ViGs through our proposed LSGC can exceed the performance of current state-of-the-art ViGs. Code is available at https://github.com/mmunir127/LogViG-Official.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13702v1" target="_blank" rel="noopener noreferrer">
                MVCustom：通过几何潜在渲染与补全实现多视图定制化扩散
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MVCustom: Multi-View Customized Diffusion via Geometric Latent Rendering and Completion
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Minjung Shin, Hyunin Cho, Sooyeon Go, Jin-Hwa Kim, Youngjung Uh
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要涉及多视图扩散模型和几何潜在渲染，属于计算机视觉和3D生成领域。虽然扩散模型是LLM相关技术，但该工作的核心应用场景是3D内容生成和视图定制，与推荐系统、搜索或广告的排序任务没有直接关联。其技术路径更偏向纯粹的视觉生成，而非我关注的异构数据建模或推荐应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:00:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13702v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13702v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multi-view generation with camera pose control and prompt-based customization are both essential elements for achieving controllable generative models. However, existing multi-view generation models do not support customization with geometric consistency, whereas customization models lack explicit viewpoint control, making them challenging to unify. Motivated by these gaps, we introduce a novel task, multi-view customization, which aims to jointly achieve multi-view camera pose control and customization. Due to the scarcity of training data in customization, existing multi-view generation models, which inherently rely on large-scale datasets, struggle to generalize to diverse prompts. To address this, we propose MVCustom, a novel diffusion-based framework explicitly designed to achieve both multi-view consistency and customization fidelity. In the training stage, MVCustom learns the subject's identity and geometry using a feature-field representation, incorporating the text-to-video diffusion backbone enhanced with dense spatio-temporal attention, which leverages temporal coherence for multi-view consistency. In the inference stage, we introduce two novel techniques: depth-aware feature rendering explicitly enforces geometric consistency, and consistent-aware latent completion ensures accurate perspective alignment of the customized subject and surrounding backgrounds. Extensive experiments demonstrate that MVCustom is the only framework that simultaneously achieves faithful multi-view generation and customization.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13698v1" target="_blank" rel="noopener noreferrer">
                面向安全多模态大语言模型的风险自适应激活导向
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Risk-adaptive Activation Steering for Safe Multimodal Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jonghyun Park, Minhyuk Seo, Jonghyun Choi
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多模态LLM的安全性问题，属于安全控制机制的研究。虽然涉及多模态和激活导向技术，但其核心焦点是安全风险控制而非推荐/搜索/广告系统的性能提升。安全主题属于明确排除的无关主题范畴，且论文未展示在推荐系统或搜索广告中的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 15:57:17
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13698v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13698v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    One of the key challenges of modern AI models is ensuring that they provide helpful responses to benign queries while refusing malicious ones. But often, the models are vulnerable to multimodal queries with harmful intent embedded in images. One approach for safety alignment is training with extensive safety datasets at the significant costs in both dataset curation and training. Inference-time alignment mitigates these costs, but introduces two drawbacks: excessive refusals from misclassified benign queries and slower inference speed due to iterative output adjustments. To overcome these limitations, we propose to reformulate queries to strengthen cross-modal attention to safety-critical image regions, enabling accurate risk assessment at the query level. Using the assessed risk, it adaptively steers activations to generate responses that are safe and helpful without overhead from iterative output adjustments. We call this Risk-adaptive Activation Steering (RAS). Extensive experiments across multiple benchmarks on multimodal safety and utility demonstrate that the RAS significantly reduces attack success rates, preserves general task performance, and improves inference speed over prior inference-time defenses.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13684v1" target="_blank" rel="noopener noreferrer">
                使用去噪扩散桥模型生成健康反事实
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Generating healthy counterfactuals with denoising diffusion bridge models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ana Lawry Aguila, Peirong Liu, Marina Crespo Aguirre, Juan Eugenio Iglesias
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注健康领域的反事实生成，属于特定领域应用而非核心推荐系统、搜索或广告技术。虽然扩散模型是生成式AI的重要技术，但论文的应用场景（健康领域）和焦点（反事实生成）与当前关注的核心领域进展和直接LLM应用相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 15:40:57
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13684v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13684v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Generating healthy counterfactuals from pathological images holds significant promise in medical imaging, e.g., in anomaly detection or for application of analysis tools that are designed for healthy scans. These counterfactuals should represent what a patient's scan would plausibly look like in the absence of pathology, preserving individual anatomical characteristics while modifying only the pathological regions. Denoising diffusion probabilistic models (DDPMs) have become popular methods for generating healthy counterfactuals of pathology data. Typically, this involves training on solely healthy data with the assumption that a partial denoising process will be unable to model disease regions and will instead reconstruct a closely matched healthy counterpart. More recent methods have incorporated synthetic pathological images to better guide the diffusion process. However, it remains challenging to guide the generative process in a way that effectively balances the removal of anomalies with the retention of subject-specific features. To solve this problem, we propose a novel application of denoising diffusion bridge models (DDBMs) - which, unlike DDPMs, condition the diffusion process not only on the initial point (i.e., the healthy image), but also on the final point (i.e., a corresponding synthetically generated pathological image). Treating the pathological image as a structurally informative prior enables us to generate counterfactuals that closely match the patient's anatomy while selectively removing pathology. The results show that our DDBM outperforms previously proposed diffusion models and fully supervised approaches at segmentation and anomaly detection tasks.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13643v1" target="_blank" rel="noopener noreferrer">
                基于DINOv2的小样本异常检测中的对抗鲁棒性与不确定性量化研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Towards Adversarial Robustness and Uncertainty Quantification in DINOv2-based Few-Shot Anomaly Detection
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Akib Mohammed Khan, Bartosz Krawczyk
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉领域的异常检测，涉及对抗鲁棒性和不确定性量化等安全相关主题。虽然DINOv2是视觉Transformer模型，但论文焦点在异常检测而非推荐/搜索/广告应用，且对抗鲁棒性属于安全范畴，属于应排除的主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 15:06:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13643v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13643v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Foundation models such as DINOv2 have shown strong performance in few-shot anomaly detection, yet two key questions remain unexamined: (i) how susceptible are these detectors to adversarial perturbations; and (ii) how well do their anomaly scores reflect calibrated uncertainty? Building on AnomalyDINO, a training-free deep nearest-neighbor detector over DINOv2 features, we present one of the first systematic studies of adversarial attacks and uncertainty estimation in this setting. To enable white-box gradient attacks while preserving test-time behavior, we attach a lightweight linear head to frozen DINOv2 features only for crafting perturbations. Using this heuristic, we evaluate the impact of FGSM across the MVTec-AD and VisA datasets and observe consistent drops in F1, AUROC, AP, and G-mean, indicating that imperceptible perturbations can flip nearest-neighbor relations in feature space to induce confident misclassification. Complementing robustness, we probe reliability and find that raw anomaly scores are poorly calibrated, revealing a gap between confidence and correctness that limits safety-critical use. As a simple, strong baseline toward trustworthiness, we apply post-hoc Platt scaling to the anomaly scores for uncertainty estimation. The resulting calibrated posteriors yield significantly higher predictive entropy on adversarially perturbed inputs than on clean ones, enabling a practical flagging mechanism for attack detection while reducing calibration error (ECE). Our findings surface concrete vulnerabilities in DINOv2-based few-shot anomaly detectors and establish an evaluation protocol and baseline for robust, uncertainty-aware anomaly detection. We argue that adversarial robustness and principled uncertainty quantification are not optional add-ons but essential capabilities if anomaly detection systems are to be trustworthy and ready for real-world deployment.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13620v1" target="_blank" rel="noopener noreferrer">
                融合遇见多样条件：基于无人机多模态目标检测的高多样性基准与基线方法（含条件线索）
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Fusion Meets Diverse Conditions: A High-diversity Benchmark and Baseline for UAV-based Multimodal Object Detection with Condition Cues
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chen Chen, Kangcheng Bin, Ting Hu, Jiahao Qi, Xingyue Liu, Tianpeng Liu, Zhen Li...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注无人机多模态目标检测，属于计算机视觉领域，与推荐系统、搜索或广告的核心技术关联性较弱。虽然涉及多模态融合技术，但其特定于无人机视觉检测的应用场景难以直接迁移到RecSys/Search/Ads领域，潜在应用价值有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:50:37
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13620v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13620v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Unmanned aerial vehicles (UAV)-based object detection with visible (RGB) and infrared (IR) images facilitates robust around-the-clock detection, driven by advancements in deep learning techniques and the availability of high-quality dataset. However, the existing dataset struggles to fully capture real-world complexity for limited imaging conditions. To this end, we introduce a high-diversity dataset ATR-UMOD covering varying scenarios, spanning altitudes from 80m to 300m, angles from 0{\deg} to 75{\deg}, and all-day, all-year time variations in rich weather and illumination conditions. Moreover, each RGB-IR image pair is annotated with 6 condition attributes, offering valuable high-level contextual information. To meet the challenge raised by such diverse conditions, we propose a novel prompt-guided condition-aware dynamic fusion (PCDF) to adaptively reassign multimodal contributions by leveraging annotated condition cues. By encoding imaging conditions as text prompts, PCDF effectively models the relationship between conditions and multimodal contributions through a task-specific soft-gating transformation. A prompt-guided condition-decoupling module further ensures the availability in practice without condition annotations. Experiments on ATR-UMOD dataset reveal the effectiveness of PCDF.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13565v1" target="_blank" rel="noopener noreferrer">
                XD-RCDepth：具有可解释性对齐和分布感知蒸馏的轻量级雷达-相机深度估计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            XD-RCDepth: Lightweight Radar-Camera Depth Estimation with Explainability-Aligned and Distribution-Aware Distillation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Huawei Sun, Zixu Wang, Xiangyuan Peng, Julius Ott, Georg Stettinger, Lorenzo Ser...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">虽然论文涉及多模态融合（雷达和相机），但这主要属于计算机视觉领域的深度估计任务，与推荐系统、搜索或广告的核心技术没有直接关联。该技术可能在某些边缘计算场景中有应用，但缺乏明确的RecSys/Search/Ads应用场景，因此相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:05:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13565v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13565v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Depth estimation remains central to autonomous driving, and radar-camera fusion offers robustness in adverse conditions by providing complementary geometric cues. In this paper, we present XD-RCDepth, a lightweight architecture that reduces the parameters by 29.7% relative to the state-of-the-art lightweight baseline while maintaining comparable accuracy. To preserve performance under compression and enhance interpretability, we introduce two knowledge-distillation strategies: an explainability-aligned distillation that transfers the teacher's saliency structure to the student, and a depth-distribution distillation that recasts depth regression as soft classification over discretized bins. Together, these components reduce the MAE compared with direct training with 7.97% and deliver competitive accuracy with real-time efficiency on nuScenes and ZJU-4DRadarCam datasets.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13557v1" target="_blank" rel="noopener noreferrer">
                基于自适应代理的面部表情识别中的文化偏见建模
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Modeling Cultural Bias in Facial Expression Recognition with Adaptive Agents
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>David Freire-Obregón, José Salas-Cáceres, Javier Lorenzo-Navarro, Oliverio J. Sa...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注面部表情识别中的文化偏见问题，这属于计算机视觉和公平性研究的交叉领域。虽然提到了建模和自适应技术，但其核心焦点是文化偏见这一非技术性话题，与推荐系统、搜索或广告的核心技术进展相关性极低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 13:53:30
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13557v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13557v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Facial expression recognition (FER) must remain robust under both cultural variation and perceptually degraded visual conditions, yet most existing evaluations assume homogeneous data and high-quality imagery. We introduce an agent-based, streaming benchmark that reveals how cross-cultural composition and progressive blurring interact to shape face recognition robustness. Each agent operates in a frozen CLIP feature space with a lightweight residual adapter trained online at sigma=0 and fixed during testing. Agents move and interact on a 5x5 lattice, while the environment provides inputs with sigma-scheduled Gaussian blur. We examine monocultural populations (Western-only, Asian-only) and mixed environments with balanced (5/5) and imbalanced (8/2, 2/8) compositions, as well as different spatial contact structures. Results show clear asymmetric degradation curves between cultural groups: JAFFE (Asian) populations maintain higher performance at low blur but exhibit sharper drops at intermediate stages, whereas KDEF (Western) populations degrade more uniformly. Mixed populations exhibit intermediate patterns, with balanced mixtures mitigating early degradation, but imbalanced settings amplify majority-group weaknesses under high blur. These findings quantify how cultural composition and interaction structure influence the robustness of FER as perceptual conditions deteriorate.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13534v1" target="_blank" rel="noopener noreferrer">
                用于复杂情感持续学习的高语义特征：一种轻量级解决方案
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            High Semantic Features for the Continual Learning of Complex Emotions: a Lightweight Solution
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Thibault Geoffroy, gauthier Gerspacher, Lionel Prevost
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注情感计算和持续学习，属于NLP领域的情感分析方向。虽然提及了轻量级解决方案，但论文的核心焦点是复杂情感建模，这与推荐系统、搜索或广告的核心技术需求关联度较低，缺乏明确的RecSys/Search/Ads应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 13:27:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13534v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13534v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Incremental learning is a complex process due to potential catastrophic forgetting of old tasks when learning new ones. This is mainly due to transient features that do not fit from task to task. In this paper, we focus on complex emotion recognition. First, we learn basic emotions and then, incrementally, like humans, complex emotions. We show that Action Units, describing facial muscle movements, are non-transient, highly semantical features that outperform those extracted by both shallow and deep convolutional neural networks. Thanks to this ability, our approach achieves interesting results when learning incrementally complex, compound emotions with an accuracy of 0.75 on the CFEE dataset and can be favorably compared to state-of-the-art results. Moreover, it results in a lightweight model with a small memory footprint.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13464v1" target="_blank" rel="noopener noreferrer">
                透过怀疑的视角：视觉位置识别的鲁棒高效不确定性估计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Through the Lens of Doubt: Robust and Efficient Uncertainty Estimation for Visual Place Recognition
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Emily Miller, Michael Milford, Muhammad Burhan Hafez, SD Ramchurn, Shoaib Ehsan
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的位置识别和不确定性估计，属于纯粹的视觉技术领域。虽然不确定性估计在推荐系统中可能有间接应用，但该论文没有展示与推荐、搜索或广告系统的明确联系，也没有涉及LLM或Transformer架构的进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 12:12:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13464v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13464v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Visual Place Recognition (VPR) enables robots and autonomous vehicles to identify previously visited locations by matching current observations against a database of known places. However, VPR systems face significant challenges when deployed across varying visual environments, lighting conditions, seasonal changes, and viewpoints changes. Failure-critical VPR applications, such as loop closure detection in simultaneous localization and mapping (SLAM) pipelines, require robust estimation of place matching uncertainty. We propose three training-free uncertainty metrics that estimate prediction confidence by analyzing inherent statistical patterns in similarity scores from any existing VPR method. Similarity Distribution (SD) quantifies match distinctiveness by measuring score separation between candidates; Ratio Spread (RS) evaluates competitive ambiguity among top-scoring locations; and Statistical Uncertainty (SU) is a combination of SD and RS that provides a unified metric that generalizes across datasets and VPR methods without requiring validation data to select the optimal metric. All three metrics operate without additional model training, architectural modifications, or computationally expensive geometric verification. Comprehensive evaluation across nine state-of-the-art VPR methods and six benchmark datasets confirms that our metrics excel at discriminating between correct and incorrect VPR matches, and consistently outperform existing approaches while maintaining negligible computational overhead, making it deployable for real-time robotic applications across varied environmental conditions with improved precision-recall performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13394v1" target="_blank" rel="noopener noreferrer">
                Spatial-DISE：评估视觉语言模型中空间推理能力的统一基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Spatial-DISE: A Unified Benchmark for Evaluating Spatial Reasoning in Vision-Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xinmiao Huang, Qisong He, Zhenglin Huang, Boxuan Wang, Zhuoyun Li, Guangliang Ch...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视觉语言模型的评估基准，属于纯粹的VLM评估范畴，与推荐系统、搜索或广告的核心技术进展无关。虽然提到了空间推理，但缺乏将这些能力转化为推荐/搜索/广告应用的明确路径或潜力说明。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 10:44:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13394v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13394v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Spatial reasoning ability is crucial for Vision Language Models (VLMs) to support real-world applications in diverse domains including robotics, augmented reality, and autonomous navigation. Unfortunately, existing benchmarks are inadequate in assessing spatial reasoning ability, especially the \emph{intrinsic-dynamic} spatial reasoning which is a fundamental aspect of human spatial cognition. In this paper, we propose a unified benchmark, \textbf{Spatial-DISE}, based on a cognitively grounded taxonomy that categorizes tasks into four fundamental quadrants: \textbf{I}ntrinsic-\textbf{S}tatic, Intrinsic-\textbf{D}ynamic, \textbf{E}xtrinsic-Static, and Extrinsic-Dynamic spatial reasoning. Moreover, to address the issue of data scarcity, we develop a scalable and automated pipeline to generate diverse and verifiable spatial reasoning questions, resulting in a new \textbf{Spatial-DISE} dataset that includes Spatial-DISE Bench (559 evaluation VQA pairs) and Spatial-DISE-12K (12K+ training VQA pairs). Our comprehensive evaluation across 28 state-of-the-art VLMs reveals that, current VLMs have a large and consistent gap to human competence, especially on multi-step multi-view spatial reasoning. Spatial-DISE offers a robust framework, valuable dataset, and clear direction for future research toward human-like spatial intelligence. Benchmark, dataset, and code will be publicly released.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13390v1" target="_blank" rel="noopener noreferrer">
                通过大模型感知语义蒸馏与对齐实现WiFi手势识别的泛化
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Generalizing WiFi Gesture Recognition via Large-Model-Aware Semantic Distillation and Alignment
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Feng-Qi Cui, Yu-Tong Guo, Tianyue Zheng, Jinyang Huang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注WiFi手势识别这一特定感知任务，属于计算机视觉和信号处理领域。虽然提到了大模型和语义蒸馏技术，但其应用场景（手势识别）与推荐系统、搜索或广告的核心技术栈关联性较弱，缺乏明确的跨领域应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 10:28:50
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13390v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13390v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    WiFi-based gesture recognition has emerged as a promising RF sensing paradigm for enabling non-contact and privacy-preserving human-computer interaction in AIoT environments. However, existing methods often suffer from limited generalization and semantic expressiveness due to the domain-sensitive nature of Channel State Information and the lack of high-level gesture abstraction. To address these challenges, we propose a novel generalization framework, termed Large-Model-Aware Semantic Distillation and Alignment (GLSDA), which leverages the semantic prior of pre-trained large foundation models to enhance gesture representation learning in both in-domain and cross-domain scenarios. Specifically, we first design a dual-path CSI encoding pipeline that captures geometric and dynamic gesture patterns via CSI-Ratio phase sequences and Doppler spectrograms. These representations are then fed into a Multiscale Semantic Encoder, which learns robust temporal embeddings and aligns them with gesture semantics through cross-modal attention mechanisms. To further enhance category discrimination, we introduce a Semantic-Aware Soft Supervision scheme that encodes inter-class correlations and reduces label ambiguity, especially for semantically similar gestures. Finally, we develop a Robust Dual-Distillation strategy to compress the aligned model into a lightweight student network, jointly distilling intermediate features and semantic-informed soft labels from the teacher model. Extensive experiments on the Widar3.0 benchmark show that GLSDA consistently outperforms state-of-the-art methods in both in-domain and cross-domain gesture recognition tasks, while significantly reducing model size and inference latency. Our method offers a scalable and deployable solution for generalized RF-based gesture interfaces in real-world AIoT applications.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13375v1" target="_blank" rel="noopener noreferrer">
                DepthVLA：通过深度感知空间推理增强视觉-语言-动作模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DepthVLA: Enhancing Vision-Language-Action Models with Depth-Aware Spatial Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tianyuan Yuan, Yicheng Liu, Chenhao Lu, Zhuoguang Chen, Tao Jiang, Hang Zhao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉-语言-动作模型中的深度感知和空间推理，属于机器人学和具身AI领域。虽然提到了多模态建模概念，但其核心应用方向（机器人控制、物理交互）与推荐系统、搜索或广告的典型应用场景关联度很低，难以找到直接的应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 10:09:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13375v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13375v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision-Language-Action (VLA) models have recently shown impressive generalization and language-guided manipulation capabilities. However, their performance degrades on tasks requiring precise spatial reasoning due to limited spatial reasoning inherited from Vision-Language Models (VLMs). Existing VLAs rely on extensive action-data pretraining to ground VLMs in 3D space, which reduces training efficiency and is still insufficient for accurate spatial understanding. In this work, we present DepthVLA, a simple yet effective VLA architecture that explicitly incorporates spatial awareness through a pretrained depth prediction module. DepthVLA adopts a mixture-of-transformers design that unifies a VLM, a depth transformer, and an action expert with fully shared attentions, forming an end-to-end model with enhanced spatial reasoning. Extensive evaluations in both real-world and simulated environments show that DepthVLA outperforms state-of-the-art approaches, achieving 78.5% vs. 65.0% progress in real-world tasks, 94.9% vs. 93.6% in the LIBERO simulator, and 74.8% vs. 58.8% in the Simpler simulator. Our code will be made publicly available.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13364v1" target="_blank" rel="noopener noreferrer">
                语言作为标签：数据稀缺下日常姿态的零样本多模态分类
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Language as a Label: Zero-Shot Multimodal Classification of Everyday Postures under Data Scarcity
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>MingZe Tang, Jubal Chandy Jacob
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注多模态分类和姿态识别，属于计算机视觉领域，与推荐系统、搜索或广告的核心技术关联性较弱。虽然提到了多模态和零样本学习，但这些技术在当前论文中的应用场景（日常姿态分类）与RecSys/Search/Ads领域缺乏直接联系，潜在应用价值有限。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:53:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13364v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13364v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Recent Vision-Language Models (VLMs) enable zero-shot classification by aligning images and text in a shared space, a promising approach for data-scarce conditions. However, the influence of prompt design on recognizing visually similar categories, such as human postures, is not well understood. This study investigates how prompt specificity affects the zero-shot classification of sitting, standing, and walking/running on a small, 285-image COCO-derived dataset. A suite of modern VLMs, including OpenCLIP, MetaCLIP 2, and SigLip, were evaluated using a three-tiered prompt design that systematically increases linguistic detail. Our findings reveal a compelling, counter-intuitive trend: for the highest-performing models (MetaCLIP 2 and OpenCLIP), the simplest, most basic prompts consistently achieve the best results. Adding descriptive detail significantly degrades performance for instance, MetaCLIP 2's multi-class accuracy drops from 68.8\% to 55.1\% a phenomenon we term "prompt overfitting". Conversely, the lower-performing SigLip model shows improved classification on ambiguous classes when given more descriptive, body-cue-based prompts.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13315v1" target="_blank" rel="noopener noreferrer">
                自增强视觉对比解码
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Self-Augmented Visual Contrastive Decoding
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Eun Woo Im, Muhammad Kashif Ali, Vivek Gupta
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉领域的解码技术，属于纯粹的视觉研究方向。虽然标题提到'对比解码'可能涉及一些通用技术，但没有明确证据表明该技术有在推荐系统、搜索或广告领域的潜在应用。该工作更偏向于计算机视觉而非多模态或异质数据建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:03:34
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13315v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13315v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large Vision-Language Models (LVLMs) have demonstrated remarkable multimodal capabilities, but they inherit the tendency to hallucinate from their underlying language models. While visual contrastive decoding has been proposed to mitigate this issue, existing methods often apply generic visual augmentations that disregard the specific context provided by the text query, limiting their effectiveness. This study introduces a novel training-free decoding strategy that addresses these limitations, featuring two key contributions. First, a self-augmentation prompting strategy that leverages the intrinsic knowledge of the model to dynamically align semantics between the query and the visual augmentation. Second, an adaptive thresholding algorithm that adaptively adjusts next token candidate size based on the output sparsity, utilizing full information from the logit distribution. Extensive experiments across four LVLMs and seven benchmarks demonstrate that the proposed decoding significantly enhances factual consistency compared to state-of-the-art decoding methods. This work highlights the importance of integrating query-dependent augmentation and entropy-aware decoding for improving effective generation of LVLMs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13303v1" target="_blank" rel="noopener noreferrer">
                基于DBNET++和BART模型的政府机构自动化文档处理系统
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Automated document processing system for government agencies using DBNET++ and BART models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Aya Kaysan Bahjat
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注政府文档处理的特定应用场景，虽然使用了BART模型，但其应用领域与搜索、推荐或广告系统无关。文档处理系统属于通用NLP应用范畴，没有展示在推荐系统、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:48:02
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13303v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13303v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.GR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    An automatic document classification system is presented that detects textual content in images and classifies documents into four predefined categories (Invoice, Report, Letter, and Form). The system supports both offline images (e.g., files on flash drives, HDDs, microSD) and real-time capture via connected cameras, and is designed to mitigate practical challenges such as variable illumination, arbitrary orientation, curved or partially occluded text, low resolution, and distant text. The pipeline comprises four stages: image capture and preprocessing, text detection [1] using a DBNet++ (Differentiable Binarization Network Plus) detector, and text classification [2] using a BART (Bidirectional and Auto-Regressive Transformers) classifier, all integrated within a user interface implemented in Python with PyQt5. The achieved results by the system for text detection in images were good at about 92.88% through 10 hours on Total-Text dataset that involve high resolution images simulate a various and very difficult challenges. The results indicate the proposed approach is effective for practical, mixed-source document categorization in unconstrained imaging scenarios.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13282v1" target="_blank" rel="noopener noreferrer">
                基于掩码退化分类的通用图像修复预训练
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Universal Image Restoration Pre-training via Masked Degradation Classification
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>JiaKui Hu, Zhengjian Yao, Lujia Jin, Yinghao Chen, Yanye Lu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的图像修复技术，虽然涉及预训练方法，但其核心应用场景是图像处理而非推荐系统、搜索或广告。掩码退化分类方法可能对视觉内容理解有一定启发，但缺乏明确的RecSys/Search/Ads应用连接，与当前关注的核心领域相关性较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:30:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13282v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13282v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This study introduces a Masked Degradation Classification Pre-Training method (MaskDCPT), designed to facilitate the classification of degradation types in input images, leading to comprehensive image restoration pre-training. Unlike conventional pre-training methods, MaskDCPT uses the degradation type of the image as an extremely weak supervision, while simultaneously leveraging the image reconstruction to enhance performance and robustness. MaskDCPT includes an encoder and two decoders: the encoder extracts features from the masked low-quality input image. The classification decoder uses these features to identify the degradation type, whereas the reconstruction decoder aims to reconstruct a corresponding high-quality image. This design allows the pre-training to benefit from both masked image modeling and contrastive learning, resulting in a generalized representation suited for restoration tasks. Benefit from the straightforward yet potent MaskDCPT, the pre-trained encoder can be used to address universal image restoration and achieve outstanding performance. Implementing MaskDCPT significantly improves performance for both convolution neural networks (CNNs) and Transformers, with a minimum increase in PSNR of 3.77 dB in the 5D all-in-one restoration task and a 34.8% reduction in PIQE compared to baseline in real-world degradation scenarios. It also emergences strong generalization to previously unseen degradation types and levels. In addition, we curate and release the UIR-2.5M dataset, which includes 2.5 million paired restoration samples across 19 degradation types and over 200 degradation levels, incorporating both synthetic and real-world data. The dataset, source code, and models are available at https://github.com/MILab-PKU/MaskDCPT.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13237v1" target="_blank" rel="noopener noreferrer">
                面向视觉-语言-动作模型的模型无关对抗攻击与防御
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Model-agnostic Adversarial Attack and Defense for Vision-Language-Action Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Haochuan Xu, Yun Sing Koh, Shuhuai Huang, Zirun Zhou, Di Wang, Jun Sakuma, Jingf...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉-语言-动作模型的对抗攻击与防御，属于机器人控制和具身智能领域，与推荐系统、搜索或广告的核心技术关联性较弱。虽然标题提及'视觉-语言'，但其核心是动作控制和安全防御，缺乏在RecSys/Search/Ads领域的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:42:44
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13237v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13237v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Vision-Language-Action (VLA) models have achieved revolutionary progress in robot learning, enabling robots to execute complex physical robot tasks from natural language instructions. Despite this progress, their adversarial robustness remains underexplored. In this work, we propose both adversarial patch attack and corresponding defense strategies for VLA models. We first introduce the Embedding Disruption Patch Attack (EDPA), a model-agnostic adversarial attack that generates patches directly placeable within the camera's view. In comparison to prior methods, EDPA can be readily applied to different VLA models without requiring prior knowledge of the model architecture, or the controlled robotic manipulator. EDPA constructs these patches by (i) disrupting the semantic alignment between visual and textual latent representations, and (ii) maximizing the discrepancy of latent representations between adversarial and corresponding clean visual inputs. Through the optimization of these objectives, EDPA distorts the VLA's interpretation of visual information, causing the model to repeatedly generate incorrect actions and ultimately result in failure to complete the given robotic task. To counter this, we propose an adversarial fine-tuning scheme for the visual encoder, in which the encoder is optimized to produce similar latent representations for both clean and adversarially perturbed visual inputs. Extensive evaluations on the widely recognized LIBERO robotic simulation benchmark demonstrate that EDPA substantially increases the task failure rate of cutting-edge VLA models, while our proposed defense effectively mitigates this degradation. The codebase is accessible via the homepage at https://edpa-attack.github.io/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13232v1" target="_blank" rel="noopener noreferrer">
                通过结构化推理与令牌合并实现否定感知的视觉语言模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            What "Not" to Detect: Negation-Aware VLMs via Structured Reasoning and Token Merging
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Inha Kang, Youngsun Lim, Seonho Lee, Jiho Choi, Junsuk Choe, Hyunjung Shim
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注视觉语言模型(VLMs)中的否定检测问题，属于纯粹的视觉-语言交叉领域研究。虽然提到了结构化推理和令牌合并技术，但这些技术改进主要针对视觉理解任务，在推荐系统、搜索或广告领域的直接应用潜力非常有限。论文核心关注的是视觉语言理解中的特定语义问题，而非能够直接应用于异构数据处理或推荐系统架构的技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:36:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13232v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13232v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    State-of-the-art vision-language models (VLMs) suffer from a critical failure in understanding negation, often referred to as affirmative bias. This limitation is particularly severe in described object detection (DOD) tasks. To address this, we propose two primary contributions: (1) a new dataset pipeline and (2) a novel, lightweight adaptation recipe. First, we introduce CoVAND, a dataset constructed with a systematic chain-of-thought (CoT) and VQA-based pipeline to generate high-quality, instance-grounded negation data. Second, we propose NegToMe, a novel text token merging module that directly tackles the architectural cause of affirmative bias. NegToMe fundamentally addresses the structural loss of negation cues in tokenization, grouping them with attributes into coherent semantic phrases. It maintains correct polarity at the input level, enabling robust negation understanding even with limited data. For instance, to prevent a model from treating the fragmented tokens "not" and "girl" as simply "girl", NegToMe binds them into a single token whose meaning is correctly distinguished from that of "girl" alone. This module is integrated with a parameter-efficient and strategic LoRA fine-tuning approach. Our method significantly improves performance on challenging negation benchmarks with a lowered false positive rate, boosting NMS-AP by up to +10.8 points on OVDEval and demonstrating generalization to SoTA VLMs. This work marks a crucial step forward in addressing negation understanding for real-world detection applications.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13198v1" target="_blank" rel="noopener noreferrer">
                基于多层级表征融合的互补信息引导占用预测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Complementary Information Guided Occupancy Prediction via Multi-Level Representation Fusion
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Rongtao Xu, Jinzhou Lin, Jialei Zhou, Jiahua Dong, Changwei Wang, Ruisheng Wang,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文主要关注计算机视觉领域的占用预测任务，涉及多层级表征融合技术。虽然多模态融合技术对推荐系统中的异构数据处理有一定启发，但论文标题明确指向视觉占用预测这一特定应用领域，与搜索、推荐、广告系统的核心需求关联度较低。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:37:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13198v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13198v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Camera-based occupancy prediction is a mainstream approach for 3D perception in autonomous driving, aiming to infer complete 3D scene geometry and semantics from 2D images. Almost existing methods focus on improving performance through structural modifications, such as lightweight backbones and complex cascaded frameworks, with good yet limited performance. Few studies explore from the perspective of representation fusion, leaving the rich diversity of features in 2D images underutilized. Motivated by this, we propose \textbf{CIGOcc, a two-stage occupancy prediction framework based on multi-level representation fusion. \textbf{CIGOcc extracts segmentation, graphics, and depth features from an input image and introduces a deformable multi-level fusion mechanism to fuse these three multi-level features. Additionally, CIGOcc incorporates knowledge distilled from SAM to further enhance prediction accuracy. Without increasing training costs, CIGOcc achieves state-of-the-art performance on the SemanticKITTI benchmark. The code is provided in the supplementary material and will be released https://github.com/VitaLemonTea1/CIGOcc
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13108v1" target="_blank" rel="noopener noreferrer">
                DriveCritic：基于视觉语言模型实现自动驾驶的上下文感知、人类对齐评估
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DriveCritic: Towards Context-Aware, Human-Aligned Evaluation for Autonomous Driving with Vision-Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jingyu Song, Zhenxin Li, Shiyi Lan, Xinglong Sun, Nadine Chang, Maying Shen, Jos...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于自动驾驶领域的视觉语言模型应用，属于特定领域应用而非通用推荐系统、搜索或广告技术。虽然提到了上下文感知和人类对齐的概念，但这些概念在自动驾驶中的实现方式与推荐/搜索/广告领域的异构数据处理有本质区别，缺乏直接的技术迁移潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 03:00:38
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13108v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13108v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.RO</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Benchmarking autonomous driving planners to align with human judgment remains a critical challenge, as state-of-the-art metrics like the Extended Predictive Driver Model Score (EPDMS) lack context awareness in nuanced scenarios. To address this, we introduce DriveCritic, a novel framework featuring two key contributions: the DriveCritic dataset, a curated collection of challenging scenarios where context is critical for correct judgment and annotated with pairwise human preferences, and the DriveCritic model, a Vision-Language Model (VLM) based evaluator. Fine-tuned using a two-stage supervised and reinforcement learning pipeline, the DriveCritic model learns to adjudicate between trajectory pairs by integrating visual and symbolic context. Experiments show DriveCritic significantly outperforms existing metrics and baselines in matching human preferences and demonstrates strong context awareness. Overall, our work provides a more reliable, human-aligned foundation to evaluating autonomous driving systems.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-1">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13063v1" target="_blank" rel="noopener noreferrer">
                真正的自监督新视角合成具有可迁移性
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>2/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            True Self-Supervised Novel View Synthesis is Transferable
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Thomas W. Mitchel, Hyunwoo Ryu, Vincent Sitzmann
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文主要关注计算机视觉中的新视角合成任务，属于纯粹的视觉生成领域。虽然自监督学习技术本身具有通用性，但该论文的特定应用（新视角合成）与推荐系统、搜索或广告的关联性非常有限，没有明确的跨模态应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 01:09:56
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13063v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13063v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In this paper, we identify that the key criterion for determining whether a model is truly capable of novel view synthesis (NVS) is transferability: Whether any pose representation extracted from one video sequence can be used to re-render the same camera trajectory in another. We analyze prior work on self-supervised NVS and find that their predicted poses do not transfer: The same set of poses lead to different camera trajectories in different 3D scenes. Here, we present XFactor, the first geometry-free self-supervised model capable of true NVS. XFactor combines pair-wise pose estimation with a simple augmentation scheme of the inputs and outputs that jointly enables disentangling camera pose from scene content and facilitates geometric reasoning. Remarkably, we show that XFactor achieves transferability with unconstrained latent pose variables, without any 3D inductive biases or concepts from multi-view geometry -- such as an explicit parameterization of poses as elements of SE(3). We introduce a new metric to quantify transferability, and through large-scale experiments, we demonstrate that XFactor significantly outperforms prior pose-free NVS transformers, and show that latent poses are highly correlated with real-world poses through probing experiments.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13744v1" target="_blank" rel="noopener noreferrer">
                Hard2Verify：面向开放式前沿数学问题的步骤级验证基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Hard2Verify: A Step-Level Verification Benchmark for Open-Ended Frontier Math
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shrey Pandit, Austin Xu, Xuan-Phi Nguyen, Yifei Ming, Caiming Xiong, Shafiq Joty
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于数学问题验证基准，属于纯粹的评估基准研究。虽然涉及验证技术，但完全限定在数学领域，与推荐系统、搜索或广告没有任何潜在应用关联。这属于纯粹的NLP评估基准范畴，属于明确的无关主题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:50:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13744v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13744v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Large language model (LLM)-based reasoning systems have recently achieved gold medal-level performance in the IMO 2025 competition, writing mathematical proofs where, to receive full credit, each step must be not only correct but also sufficiently supported. To train LLM-based reasoners in such challenging, open-ended settings, strong verifiers capable of catching step-level mistakes are necessary prerequisites. We introduce Hard2Verify, a human-annotated, step-level verification benchmark produced with over 500 hours of human labor. Hard2Verify is designed to rigorously assess step-level verifiers at the frontier: Verifiers must provide step-level annotations or identify the first error in responses generated by frontier LLMs for very recent, challenging, and open-ended math questions. We evaluate 29 generative critics and process reward models, demonstrating that, beyond a few standouts, open-source verifiers lag closed source models. We subsequently analyze what drives poor performance in step-level verification, the impacts of scaling verifier compute, as well as fundamental questions such as self-verification and verification-generation dynamics.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13734v1" target="_blank" rel="noopener noreferrer">
                GAPS：基于临床基础的自动化基准，用于评估AI临床医生
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            GAPS: A Clinically Grounded, Automated Benchmark for Evaluating AI Clinicians
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiuyuan Chen, Tao Sun, Dexin Su, Ailing Yu, Junwei Liu, Zhe Chen, Gangzeng Jin, ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医疗领域AI临床医生的评估基准，属于明确的医学应用范畴。根据筛选规则，医疗、生物学等特定领域应用属于不相关主题，且论文内容与推荐系统、搜索、广告或相关使能技术无任何关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:40:28
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13734v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13734v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Current benchmarks for AI clinician systems, often based on multiple-choice exams or manual rubrics, fail to capture the depth, robustness, and safety required for real-world clinical practice. To address this, we introduce the GAPS framework, a multidimensional paradigm for evaluating \textbf{G}rounding (cognitive depth), \textbf{A}dequacy (answer completeness), \textbf{P}erturbation (robustness), and \textbf{S}afety. Critically, we developed a fully automated, guideline-anchored pipeline to construct a GAPS-aligned benchmark end-to-end, overcoming the scalability and subjectivity limitations of prior work. Our pipeline assembles an evidence neighborhood, creates dual graph and tree representations, and automatically generates questions across G-levels. Rubrics are synthesized by a DeepResearch agent that mimics GRADE-consistent, PICO-driven evidence review in a ReAct loop. Scoring is performed by an ensemble of large language model (LLM) judges. Validation confirmed our automated questions are high-quality and align with clinician judgment. Evaluating state-of-the-art models on the benchmark revealed key failure modes: performance degrades sharply with increased reasoning depth (G-axis), models struggle with answer completeness (A-axis), and they are highly vulnerable to adversarial perturbations (P-axis) as well as certain safety issues (S-axis). This automated, clinically-grounded approach provides a reproducible and scalable method for rigorously evaluating AI clinician systems and guiding their development toward safer, more reliable clinical practice.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13681v1" target="_blank" rel="noopener noreferrer">
                采样如何影响机器生成文本的可检测性：一项综合性研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            How Sampling Affects the Detectability of Machine-written texts: A Comprehensive Study
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Matthieu Dubois, François Yvon, Pablo Piantanida
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于机器生成文本的检测技术，这属于纯粹的NLP评估和检测领域，与推荐系统、搜索或广告的核心技术无关。论文内容涉及文本检测的基准测试和评估方法，属于明确的无关主题范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 15:36:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13681v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13681v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    As texts generated by Large Language Models (LLMs) are ever more common and often indistinguishable from human-written content, research on automatic text detection has attracted growing attention. Many recent detectors report near-perfect accuracy, often boasting AUROC scores above 99\%. However, these claims typically assume fixed generation settings, leaving open the question of how robust such systems are to changes in decoding strategies. In this work, we systematically examine how sampling-based decoding impacts detectability, with a focus on how subtle variations in a model's (sub)word-level distribution affect detection performance. We find that even minor adjustments to decoding parameters - such as temperature, top-p, or nucleus sampling - can severely impair detector accuracy, with AUROC dropping from near-perfect levels to 1\% in some settings. Our findings expose critical blind spots in current detection methods and emphasize the need for more comprehensive evaluation protocols. To facilitate future research, we release a large-scale dataset encompassing 37 decoding configurations, along with our code and evaluation framework https://github.com/BaggerOfWords/Sampling-and-Detection
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13598v1" target="_blank" rel="noopener noreferrer">
                FreshTab：为表格到文本生成评估获取新鲜数据
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FreshTab: Sourcing Fresh Data for Table-to-Text Generation Evaluation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kristýna Onderková, Ondřej Plátek, Zdeněk Kasner, Ondřej Dušek
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于表格到文本生成的评估数据获取，这属于纯粹的文本生成和评估领域，与推荐系统、搜索或广告的核心技术无关。论文标题表明其关注的是内容生成和评估基准，这些主题已被明确列为不相关内容，没有显示出在推荐、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:31:44
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13598v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13598v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Table-to-text generation (insight generation from tables) is a challenging task that requires precision in analyzing the data. In addition, the evaluation of existing benchmarks is affected by contamination of Large Language Model (LLM) training data as well as domain imbalance. We introduce FreshTab, an on-the-fly table-to-text benchmark generation from Wikipedia, to combat the LLM data contamination problem and enable domain-sensitive evaluation. While non-English table-to-text datasets are limited, FreshTab collects datasets in different languages on demand (we experiment with German, Russian and French in addition to English). We find that insights generated by LLMs from recent tables collected by our method appear clearly worse by automatic metrics, but this does not translate into LLM and human evaluations. Domain effects are visible in all evaluations, showing that a~domain-balanced benchmark is more challenging.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13500v1" target="_blank" rel="noopener noreferrer">
                MedREK：基于检索的医学大语言模型编辑方法及关键感知提示
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MedREK: Retrieval-Based Editing for Medical LLMs with Key-Aware Prompts
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shujun Xia, Haokun Lin, Yichen Wu, Yinan Zhou, Zixuan Li, Zhongwei Wan, Xingrun ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学领域的LLM编辑技术，属于明确的医学领域应用，属于用户指定的不相关主题。论文标题明确提及医学应用，与RecSys、搜索或广告领域无直接关联，检索编辑技术也未显示出在这些领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 12:50:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13500v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13500v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    LLMs hold great promise for healthcare applications, but the rapid evolution of medical knowledge and errors in training data often cause them to generate outdated or inaccurate information, limiting their applicability in high-stakes clinical practice. Model editing has emerged as a potential remedy without full retraining. While parameter-based editing often compromises locality and is thus ill-suited for the medical domain, retrieval-based editing offers a more viable alternative. However, it still faces two critical challenges: (1) representation overlap within the medical knowledge space often causes inaccurate retrieval and reduces editing accuracy; (2) existing methods are restricted to single-sample edits, while batch-editing remains largely unexplored despite its importance for real-world medical applications. To address these challenges, we first construct MedVersa, \hk{an enhanced benchmark with broader coverage of medical subjects, designed to evaluate both single and batch edits under strict locality constraints}. We then propose MedREK, a retrieval-based editing framework that integrates a shared query-key module for precise matching with an attention-based prompt encoder for informative guidance. Experimental results on various medical benchmarks demonstrate that our MedREK achieves superior performance across different core metrics and provides the first validated solution for batch-editing in medical LLMs. Our code and dataset are available at https://github.com/mylittleriver/MedREK.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13430v1" target="_blank" rel="noopener noreferrer">
                阿拉伯语大语言模型评估：基准、方法与差距综述
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Evaluating Arabic Large Language Models: A Survey of Benchmarks, Methods, and Gaps
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ahmed Alzubaidi, Shaikha Alsuwaidi, Basma El Amel Boussaha, Leen AlQadi, Omar Al...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于阿拉伯语LLM的评估基准和方法，属于纯粹的评估基准和NLP中心主题，与我的关注点无关。论文没有涉及推荐系统、搜索或广告的核心进展，也没有讨论可能应用于这些领域的使能技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 11:25:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13430v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13430v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This survey provides the first systematic review of Arabic LLM benchmarks, analyzing 40+ evaluation benchmarks across NLP tasks, knowledge domains, cultural understanding, and specialized capabilities. We propose a taxonomy organizing benchmarks into four categories: Knowledge, NLP Tasks, Culture and Dialects, and Target-Specific evaluations. Our analysis reveals significant progress in benchmark diversity while identifying critical gaps: limited temporal evaluation, insufficient multi-turn dialogue assessment, and cultural misalignment in translated datasets. We examine three primary approaches: native collection, translation, and synthetic generation discussing their trade-offs regarding authenticity, scale, and cost. This work serves as a comprehensive reference for Arabic NLP researchers, providing insights into benchmark methodologies, reproducibility standards, and evaluation metrics while offering recommendations for future development.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13417v1" target="_blank" rel="noopener noreferrer">
                通过气候论述中的隐式因果链发现评估大语言模型推理能力
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Assessing LLM Reasoning Through Implicit Causal Chain Discovery in Climate Discourse
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Liesbeth Allein, Nataly Pineda-Castañeda, Andrea Rocci, Marie-Francine Moens
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于评估LLM在特定领域（气候论述）中的推理能力，这属于纯粹的NLP评估基准研究。虽然涉及LLM推理，但论文关注的是气候领域的因果链发现，没有展示在推荐系统、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 11:15:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13417v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13417v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    How does a cause lead to an effect, and which intermediate causal steps explain their connection? This work scrutinizes the mechanistic causal reasoning capabilities of large language models (LLMs) to answer these questions through the task of implicit causal chain discovery. In a diagnostic evaluation framework, we instruct nine LLMs to generate all possible intermediate causal steps linking given cause-effect pairs in causal chain structures. These pairs are drawn from recent resources in argumentation studies featuring polarized discussion on climate change. Our analysis reveals that LLMs vary in the number and granularity of causal steps they produce. Although they are generally self-consistent and confident about the intermediate causal connections in the generated chains, their judgments are mainly driven by associative pattern matching rather than genuine causal reasoning. Nonetheless, human evaluations confirmed the logical coherence and integrity of the generated chains. Our baseline causal chain discovery approach, insights from our diagnostic evaluation, and benchmark dataset with causal chains lay a solid foundation for advancing future work in implicit, mechanistic causal reasoning in argumentation settings.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13357v1" target="_blank" rel="noopener noreferrer">
                联邦语音模型中的个人属性泄露
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Personal Attribute Leakage in Federated Speech Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hamdan Al-Ali, Ali Reza Ghavamipour, Tommaso Caselli, Fatih Turkmen, Zeerak Tala...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文涉及联邦学习和隐私泄露问题，这属于明确排除的无关主题。虽然标题提到语音模型，但核心关注点是隐私和安全方面，与推荐系统、搜索或广告的核心技术进展没有任何关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:43:10
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13357v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13357v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Federated learning is a common method for privacy-preserving training of machine learning models. In this paper, we analyze the vulnerability of ASR models to attribute inference attacks in the federated setting. We test a non-parametric white-box attack method under a passive threat model on three ASR models: Wav2Vec2, HuBERT, and Whisper. The attack operates solely on weight differentials without access to raw speech from target speakers. We demonstrate attack feasibility on sensitive demographic and clinical attributes: gender, age, accent, emotion, and dysarthria. Our findings indicate that attributes that are underrepresented or absent in the pre-training data are more vulnerable to such inference attacks. In particular, information about accents can be reliably inferred from all models. Our findings expose previously undocumented vulnerabilities in federated ASR models and offer insights towards improved security.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13341v1" target="_blank" rel="noopener noreferrer">
                谚语是新的皮媞亚神谕吗？探索希腊谚语中的情感
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Are Proverbs the New Pythian Oracles? Exploring Sentiment in Greek Sayings
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Katerina Korre, John Pavlopoulos
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于希腊谚语的情感分析，属于语言学和文化研究领域，与推荐系统、搜索或广告的核心技术进展完全无关。论文内容不涉及任何LLM技术、Transformer架构改进或异构数据建模，也没有任何潜在的应用于RecSys/Search/Ads的技术路径。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:26:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13341v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13341v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Proverbs are among the most fascinating linguistic phenomena that transcend cultural and linguistic boundaries. Yet, much of the global landscape of proverbs remains underexplored, as many cultures preserve their traditional wisdom within their own communities due to the oral tradition of the phenomenon. Taking advantage of the current advances in Natural Language Processing (NLP), we focus on Greek proverbs, analyzing their sentiment. Departing from an annotated dataset of Greek proverbs, we expand it to include local dialects, effectively mapping the annotated sentiment. We present (1) a way to exploit LLMs in order to perform sentiment classification of proverbs, (2) a map of Greece that provides an overview of the distribution of sentiment, (3) a combinatory analysis in terms of the geographic position, dialect, and topic of proverbs. Our findings show that LLMs can provide us with an accurate enough picture of the sentiment of proverbs, especially when approached as a non-conventional sentiment polarity task. Moreover, in most areas of Greece negative sentiment is more prevalent.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13293v1" target="_blank" rel="noopener noreferrer">
                面向自回归TTS模型的鲁棒情感控制的不匹配感知引导
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Mismatch Aware Guidance for Robust Emotion Control in Auto-Regressive TTS Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yizhou Peng, Yukun Ma, Chong Zhang, Yi-Wen Chao, Chongjia Ni, Bin Ma
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本转语音（TTS）模型中的情感控制，属于语音生成领域，与推荐系统、搜索或广告的核心技术无直接关联。尽管涉及自回归模型，但其应用场景（TTS情感控制）与RecSys/Search/Ads的排名、检索或用户行为建模需求不匹配，且未提及任何潜在的跨领域应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:37:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13293v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13293v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    While Text-to-Speech (TTS) systems can achieve fine-grained control over emotional expression via natural language prompts, a significant challenge emerges when the desired emotion (style prompt) conflicts with the semantic content of the text. This mismatch often results in unnatural-sounding speech, undermining the goal of achieving fine-grained emotional control. Classifier-Free Guidance (CFG) is a key technique for enhancing prompt alignment; however, its application to auto-regressive (AR) TTS models remains underexplored, which can lead to degraded audio quality. This paper directly addresses the challenge of style-content mismatch in AR TTS models by proposing an adaptive CFG scheme that adjusts to different levels of the detected mismatch, as measured using large language models or natural language inference models. This solution is based on a comprehensive analysis of CFG's impact on emotional expressiveness in state-of-the-art AR TTS models. Our results demonstrate that the proposed adaptive CFG scheme improves the emotional expressiveness of the AR TTS model while maintaining audio quality and intelligibility.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13281v1" target="_blank" rel="noopener noreferrer">
                双头胜于单头：基于双重假设的视听语音错误纠正
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Two Heads Are Better Than One: Audio-Visual Speech Error Correction with Dual Hypotheses
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sungnyun Kim, Kangwook Jang, Sungwoo Cho, Joon Son Chung, Hoirin Kim, Se-Young Y...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于音频-视觉语音错误纠正，属于多模态语音处理领域，与推荐系统、搜索或广告的核心技术无直接关联。虽然涉及多模态融合，但其应用场景（语音错误纠正）和核心技术（视听语音处理）与我的关注领域相距甚远，无法看出在RecSys/Search/Ads中的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:27:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13281v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13281v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">eess.AS</span><span class="category-tag">cs.CL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper introduces a new paradigm for generative error correction (GER) framework in audio-visual speech recognition (AVSR) that reasons over modality-specific evidences directly in the language space. Our framework, DualHyp, empowers a large language model (LLM) to compose independent N-best hypotheses from separate automatic speech recognition (ASR) and visual speech recognition (VSR) models. To maximize the effectiveness of DualHyp, we further introduce RelPrompt, a noise-aware guidance mechanism that provides modality-grounded prompts to the LLM. RelPrompt offers the temporal reliability of each modality stream, guiding the model to dynamically switch its focus between ASR and VSR hypotheses for an accurate correction. Under various corruption scenarios, our framework attains up to 57.7% error rate gain on the LRS2 benchmark over standard ASR baseline, contrary to single-stream GER approaches that achieve only 10% gain. To facilitate research within our DualHyp framework, we release the code and the dataset comprising ASR and VSR hypotheses at https://github.com/sungnyun/dualhyp.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13194v1" target="_blank" rel="noopener noreferrer">
                StressTransfer：具有重音保持功能的压力感知语音到语音翻译
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            StressTransfer: Stress-Aware Speech-to-Speech Translation with Emphasis Preservation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xi Chen, Yuchen Song, Satoshi Nakamura
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于语音到语音翻译中的重音处理，属于纯粹的语音处理领域，与搜索、推荐或广告系统没有明显关联。即使考虑其作为使能技术的潜力，语音重音处理在RecSys/Search/Ads应用场景中缺乏明确的价值主张和实际应用路径。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:32:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13194v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13194v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We propose a stress-aware speech-to-speech translation (S2ST) system that preserves word-level emphasis by leveraging LLMs for cross-lingual emphasis conversion. Our method translates source-language stress into target-language tags that guide a controllable TTS model. To overcome data scarcity, we developed a pipeline to automatically generate aligned training data and introduce the "LLM-as-Judge" for evaluation. Experiments show our approach substantially outperforms baselines in preserving emphasis while maintaining comparable translation quality, speaker intent, and naturalness. Our work highlights the importance of prosody in translation and provides an effective, data-efficient solution for preserving paralinguistic cues in S2ST.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13115v1" target="_blank" rel="noopener noreferrer">
                多标签临床文本资格分类与摘要系统
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Multi-Label Clinical Text Eligibility Classification and Summarization System
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Surya Tejaswi Yerramsetty, Almas Fathimah
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医疗领域的临床文本处理，属于明确的医疗领域特定应用，与推荐系统、搜索或广告的核心技术无关。虽然涉及文本分类和摘要技术，但这些技术在医疗场景中的应用不符合当前关注的任何技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 03:21:43
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13115v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13115v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CL</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Clinical trials are central to medical progress because they help improve understanding of human health and the healthcare system. They play a key role in discovering new ways to detect, prevent, or treat diseases, and it is essential that clinical trials include participants with appropriate and diverse medical backgrounds. In this paper, we propose a system that leverages Natural Language Processing (NLP) and Large Language Models (LLMs) to automate multi-label clinical text eligibility classification and summarization. The system combines feature extraction methods such as word embeddings (Word2Vec) and named entity recognition to identify relevant medical concepts, along with traditional vectorization techniques such as count vectorization and TF-IDF (Term Frequency-Inverse Document Frequency). We further explore weighted TF-IDF word embeddings that integrate both count-based and embedding-based strengths to capture term importance effectively. Multi-label classification using Random Forest and SVM models is applied to categorize documents based on eligibility criteria. Summarization techniques including TextRank, Luhn, and GPT-3 are evaluated to concisely summarize eligibility requirements. Evaluation with ROUGE scores demonstrates the effectiveness of the proposed methods. This system shows potential for automating clinical trial eligibility assessment using data-driven approaches, thereby improving research efficiency.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13106v1" target="_blank" rel="noopener noreferrer">
                TRUSTVIS：面向大语言模型的多维度可信度评估框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            TRUSTVIS: A Multi-Dimensional Trustworthiness Evaluation Framework for Large Language Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ruoyu Sun, Da Song, Jiayang Song, Yuheng Huang, Lei Ma
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于大语言模型的可信度评估框架，属于评估基准和可信度研究范畴，这些都被明确列为不相关主题。虽然标题涉及LLMs，但内容方向是评估而非核心技术进步或直接应用，没有展示在推荐系统、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 02:59:07
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13106v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13106v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.SE</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CL</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    As Large Language Models (LLMs) continue to revolutionize Natural Language Processing (NLP) applications, critical concerns about their trustworthiness persist, particularly in safety and robustness. To address these challenges, we introduce TRUSTVIS, an automated evaluation framework that provides a comprehensive assessment of LLM trustworthiness. A key feature of our framework is its interactive user interface, designed to offer intuitive visualizations of trustworthiness metrics. By integrating well-known perturbation methods like AutoDAN and employing majority voting across various evaluation methods, TRUSTVIS not only provides reliable results but also makes complex evaluation processes accessible to users. Preliminary case studies on models like Vicuna-7b, Llama2-7b, and GPT-3.5 demonstrate the effectiveness of our framework in identifying safety and robustness vulnerabilities, while the interactive interface allows users to explore results in detail, empowering targeted model improvements. Video Link: https://youtu.be/k1TrBqNVg8g
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13809v1" target="_blank" rel="noopener noreferrer">
                PhysMaster：通过强化学习掌握视频生成的物理表征
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            PhysMaster: Mastering Physical Representation for Video Generation via Reinforcement Learning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sihui Ji, Xi Chen, Xin Tao, Pengfei Wan, Hengshuang Zhao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频生成中的物理表征学习，属于纯粹的视觉生成领域，与推荐系统、搜索或广告没有直接关联。虽然使用了强化学习，但应用场景是视频生成而非排名或用户建模，完全超出了相关技术范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:59:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13809v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13809v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Video generation models nowadays are capable of generating visually realistic videos, but often fail to adhere to physical laws, limiting their ability to generate physically plausible videos and serve as ''world models''. To address this issue, we propose PhysMaster, which captures physical knowledge as a representation for guiding video generation models to enhance their physics-awareness. Specifically, PhysMaster is based on the image-to-video task where the model is expected to predict physically plausible dynamics from the input image. Since the input image provides physical priors like relative positions and potential interactions of objects in the scenario, we devise PhysEncoder to encode physical information from it as an extra condition to inject physical knowledge into the video generation process. The lack of proper supervision on the model's physical performance beyond mere appearance motivates PhysEncoder to apply reinforcement learning with human feedback to physical representation learning, which leverages feedback from generation models to optimize physical representations with Direct Preference Optimization (DPO) in an end-to-end manner. PhysMaster provides a feasible solution for improving physics-awareness of PhysEncoder and thus of video generation, proving its ability on a simple proxy task and generalizability to wide-ranging physical scenarios. This implies that our PhysMaster, which unifies solutions for various physical processes via representation learning in the reinforcement learning paradigm, can act as a generic and plug-in solution for physics-aware video generation and broader applications.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13793v1" target="_blank" rel="noopener noreferrer">
                NoisePrints：用于私有扩散模型中作者身份识别的无失真水印技术
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            NoisePrints: Distortion-Free Watermarks for Authorship in Private Diffusion Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Nir Goren, Oren Katzir, Abhinav Nakarmi, Eyal Ronen, Mahmood Sharif, Or Patashni...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于扩散模型中的水印技术，属于内容生成和版权保护领域，与推荐系统、搜索或广告的核心技术无关。虽然提到了私有模型，但这属于隐私保护范畴，属于明确排除的非技术性话题。该技术没有明显的应用场景可以转化为推荐、搜索或广告系统的核心算法改进。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:50:45
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13793v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13793v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.CR</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    With the rapid adoption of diffusion models for visual content generation, proving authorship and protecting copyright have become critical. This challenge is particularly important when model owners keep their models private and may be unwilling or unable to handle authorship issues, making third-party verification essential. A natural solution is to embed watermarks for later verification. However, existing methods require access to model weights and rely on computationally heavy procedures, rendering them impractical and non-scalable. To address these challenges, we propose , a lightweight watermarking scheme that utilizes the random seed used to initialize the diffusion process as a proof of authorship without modifying the generation process. Our key observation is that the initial noise derived from a seed is highly correlated with the generated visual content. By incorporating a hash function into the noise sampling process, we further ensure that recovering a valid seed from the content is infeasible. We also show that sampling an alternative seed that passes verification is infeasible, and demonstrate the robustness of our method under various manipulations. Finally, we show how to use cryptographic zero-knowledge proofs to prove ownership without revealing the seed. By keeping the seed secret, we increase the difficulty of watermark removal. In our experiments, we validate NoisePrints on multiple state-of-the-art diffusion models for images and videos, demonstrating efficient verification using only the seed and output, without requiring access to model weights.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13787v1" target="_blank" rel="noopener noreferrer">
                基于扩散模型的故事延续中语义一致性的自适应视觉条件控制
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Adaptive Visual Conditioning for Semantic Consistency in Diffusion-Based Story Continuation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Seyed Mohammad Mousavi, Morteza Analoui
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于扩散模型在故事延续中的视觉生成应用，属于纯粹的视觉内容生成领域。虽然提到了语义一致性，但这是针对故事文本到视觉内容的生成任务，与推荐系统、搜索或广告中的排序、检索或用户建模没有直接关联。该技术缺乏在RecSys/Search/Ads领域的潜在应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:43:22
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13787v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13787v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Story continuation focuses on generating the next image in a narrative sequence so that it remains coherent with both the ongoing text description and the previously observed images. A central challenge in this setting lies in utilizing prior visual context effectively, while ensuring semantic alignment with the current textual input. In this work, we introduce AVC (Adaptive Visual Conditioning), a framework for diffusion-based story continuation. AVC employs the CLIP model to retrieve the most semantically aligned image from previous frames. Crucially, when no sufficiently relevant image is found, AVC adaptively restricts the influence of prior visuals to only the early stages of the diffusion process. This enables the model to exploit visual context when beneficial, while avoiding the injection of misleading or irrelevant information. Furthermore, we improve data quality by re-captioning a noisy dataset using large language models, thereby strengthening textual supervision and semantic alignment. Quantitative results and human evaluations demonstrate that AVC achieves superior coherence, semantic consistency, and visual fidelity compared to strong baselines, particularly in challenging cases where prior visuals conflict with the current input.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13778v1" target="_blank" rel="noopener noreferrer">
                InternVLA-M1：一种用于通用机器人策略的空间引导视觉-语言-动作框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            InternVLA-M1: A Spatially Guided Vision-Language-Action Framework for Generalist Robot Policy
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xinyi Chen, Yilun Chen, Yanwei Fu, Ning Gao, Jiaya Jia, Weiyang Jin, Hao Li, Yao...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于机器人控制领域的视觉-语言-动作框架，属于机器人学特定应用。虽然涉及多模态建模，但其核心是机器人策略学习，与推荐系统、搜索或广告领域没有直接关联。该技术不具备在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:30:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13778v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13778v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.RO</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We introduce InternVLA-M1, a unified framework for spatial grounding and robot control that advances instruction-following robots toward scalable, general-purpose intelligence. Its core idea is spatially guided vision-language-action training, where spatial grounding serves as the critical link between instructions and robot actions. InternVLA-M1 employs a two-stage pipeline: (i) spatial grounding pre-training on over 2.3M spatial reasoning data to determine ``where to act'' by aligning instructions with visual, embodiment-agnostic positions, and (ii) spatially guided action post-training to decide ``how to act'' by generating embodiment-aware actions through plug-and-play spatial prompting. This spatially guided training recipe yields consistent gains: InternVLA-M1 outperforms its variant without spatial guidance by +14.6% on SimplerEnv Google Robot, +17% on WidowX, and +4.3% on LIBERO Franka, while demonstrating stronger spatial reasoning capability in box, point, and trace prediction. To further scale instruction following, we built a simulation engine to collect 244K generalizable pick-and-place episodes, enabling a 6.2% average improvement across 200 tasks and 3K+ objects. In real-world clustered pick-and-place, InternVLA-M1 improved by 7.3%, and with synthetic co-training, achieved +20.6% on unseen objects and novel configurations. Moreover, in long-horizon reasoning-intensive scenarios, it surpassed existing works by over 10%. These results highlight spatially guided training as a unifying principle for scalable and resilient generalist robots. Code and models are available at https://github.com/InternRobotics/InternVLA-M1.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13759v1" target="_blank" rel="noopener noreferrer">
                Uni-MMMU：一个大规模多学科多模态统一基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Uni-MMMU: A Massive Multi-discipline Multimodal Unified Benchmark
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kai Zou, Ziqi Huang, Yuhao Dong, Shulin Tian, Dian Zheng, Hongbo Liu, Jingwen He...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这是一个多模态基准测试论文，专注于评估模型在各种学科任务上的表现。虽然涉及多模态，但它属于纯粹的评估基准范畴，与推荐系统、搜索或广告的核心技术进展、Transformer架构改进或直接应用无关。该论文没有展示在RecSys/Search/Ads领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 17:10:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13759v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13759v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Unified multimodal models aim to jointly enable visual understanding and generation, yet current benchmarks rarely examine their true integration. Existing evaluations either treat the two abilities in isolation or overlook tasks that inherently couple them. To address this gap, we present Uni-MMMU, a comprehensive and discipline-aware benchmark that systematically unfolds the bidirectional synergy between generation and understanding across eight reasoning-centric domains, including science, coding, mathematics, and puzzles. Each task is bidirectionally coupled, demanding models to (i) leverage conceptual understanding to guide precise visual synthesis, or (ii) utilize generation as a cognitive scaffold for analytical reasoning. Uni-MMMU incorporates verifiable intermediate reasoning steps, unique ground truths, and a reproducible scoring protocol for both textual and visual outputs. Through extensive evaluation of state-of-the-art unified, generation-only, and understanding-only models, we reveal substantial performance disparities and cross-modal dependencies, offering new insights into when and how these abilities reinforce one another, and establishing a reliable foundation for advancing unified models.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13745v1" target="_blank" rel="noopener noreferrer">
                UniCalli：一个用于中文书法列级生成与识别的统一扩散框架
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            UniCalli: A Unified Diffusion Framework for Column-Level Generation and Recognition of Chinese Calligraphy
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Tianshuo Xu, Kai Wang, Zhifei Chen, Leyi Wu, Tianshui Wen, Fei Chao, Ying-Cong C...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于中文书法的生成与识别，属于计算机视觉和艺术生成领域，与推荐系统、搜索或广告的核心技术没有直接关联。虽然扩散模型是生成技术，但该应用场景过于特定，无法为RecSys/Search/Ads领域提供可转移的技术见解或应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:52:07
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13745v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13745v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Computational replication of Chinese calligraphy remains challenging. Existing methods falter, either creating high-quality isolated characters while ignoring page-level aesthetics like ligatures and spacing, or attempting page synthesis at the expense of calligraphic correctness. We introduce \textbf{UniCalli}, a unified diffusion framework for column-level recognition and generation. Training both tasks jointly is deliberate: recognition constrains the generator to preserve character structure, while generation provides style and layout priors. This synergy fosters concept-level abstractions that improve both tasks, especially in limited-data regimes. We curated a dataset of over 8,000 digitized pieces, with ~4,000 densely annotated. UniCalli employs asymmetric noising and a rasterized box map for spatial priors, trained on a mix of synthetic, labeled, and unlabeled data. The model achieves state-of-the-art generative quality with superior ligature continuity and layout fidelity, alongside stronger recognition. The framework successfully extends to other ancient scripts, including Oracle bone inscriptions and Egyptian hieroglyphs. Code and data can be viewed in \href{https://github.com/EnVision-Research/UniCalli}{this URL}.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13735v1" target="_blank" rel="noopener noreferrer">
                用于超低场到高场MRI合成的循环自监督扩散方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Cyclic Self-Supervised Diffusion for Ultra Low-field to High-field MRI Synthesis
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhenxuan Zhang, Peiyuan Jing, Zi Wang, Ula Briski, Coraline Beitone, Yue Yang, Y...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学成像领域的MRI合成技术，属于医疗应用范畴，与推荐系统、搜索或广告领域完全无关。论文涉及的自监督扩散方法虽然技术上有创新性，但缺乏在RecSys/Search/Ads领域的潜在应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:41:54
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13735v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13735v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Synthesizing high-quality images from low-field MRI holds significant potential. Low-field MRI is cheaper, more accessible, and safer, but suffers from low resolution and poor signal-to-noise ratio. This synthesis process can reduce reliance on costly acquisitions and expand data availability. However, synthesizing high-field MRI still suffers from a clinical fidelity gap. There is a need to preserve anatomical fidelity, enhance fine-grained structural details, and bridge domain gaps in image contrast. To address these issues, we propose a \emph{cyclic self-supervised diffusion (CSS-Diff)} framework for high-field MRI synthesis from real low-field MRI data. Our core idea is to reformulate diffusion-based synthesis under a cycle-consistent constraint. It enforces anatomical preservation throughout the generative process rather than just relying on paired pixel-level supervision. The CSS-Diff framework further incorporates two novel processes. The slice-wise gap perception network aligns inter-slice inconsistencies via contrastive learning. The local structure correction network enhances local feature restoration through self-reconstruction of masked and perturbed patches. Extensive experiments on cross-field synthesis tasks demonstrate the effectiveness of our method, achieving state-of-the-art performance (e.g., 31.80 $\pm$ 2.70 dB in PSNR, 0.943 $\pm$ 0.102 in SSIM, and 0.0864 $\pm$ 0.0689 in LPIPS). Beyond pixel-wise fidelity, our method also preserves fine-grained anatomical structures compared with the original low-field MRI (e.g., left cerebral white matter error drops from 12.1$\%$ to 2.1$\%$, cortex from 4.2$\%$ to 3.7$\%$). To conclude, our CSS-Diff can synthesize images that are both quantitatively reliable and anatomically consistent.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13729v1" target="_blank" rel="noopener noreferrer">
                LiFMCR：光场多相机配准的数据集与基准
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            LiFMCR: Dataset and Benchmark for Light Field Multi-Camera Registration
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Aymeric Fleith, Julian Zirbel, Daniel Cremers, Niclas Zeller
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于光场多相机配准，属于计算机视觉中的特定技术领域，与推荐系统、搜索或广告的核心技术进展没有直接关联。光场相机技术主要应用于3D重建和计算摄影，在当前聚焦的LLM技术、Transformer架构改进或异构数据统一建模方面缺乏明确的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:32:27
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13729v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13729v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present LiFMCR, a novel dataset for the registration of multiple micro lens array (MLA)-based light field cameras. While existing light field datasets are limited to single-camera setups and typically lack external ground truth, LiFMCR provides synchronized image sequences from two high-resolution Raytrix R32 plenoptic cameras, together with high-precision 6-degrees of freedom (DoF) poses recorded by a Vicon motion capture system. This unique combination enables rigorous evaluation of multi-camera light field registration methods. As a baseline, we provide two complementary registration approaches: a robust 3D transformation estimation via a RANSAC-based method using cross-view point clouds, and a plenoptic PnP algorithm estimating extrinsic 6-DoF poses from single light field images. Both explicitly integrate the plenoptic camera model, enabling accurate and scalable multi-camera registration. Experiments show strong alignment with the ground truth, supporting reliable multi-view light field processing. Project page: https://lifmcr.github.io/
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13720v1" target="_blank" rel="noopener noreferrer">
                Willis环中心线图：数据集与基线算法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Circle of Willis Centerline Graphs: A Dataset and Baseline Algorithm
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Fabio Musio, Norman Juchler, Kaiyuan Yang, Suprosanna Shit, Chinmay Prabhakar, B...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文标题明确涉及医学影像领域（Willis环是大脑中的血管结构），专注于血管中心线提取的数据集和算法。这与我的关注领域（推荐系统、搜索、广告及相关的LLM/Transformer技术）完全无关，属于明确的医学应用范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 16:22:51
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13720v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13720v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The Circle of Willis (CoW) is a critical network of arteries in the brain, often implicated in cerebrovascular pathologies. Voxel-level segmentation is an important first step toward an automated CoW assessment, but a full quantitative analysis requires centerline representations. However, conventional skeletonization techniques often struggle to extract reliable centerlines due to the CoW's complex geometry, and publicly available centerline datasets remain scarce. To address these challenges, we used a thinning-based skeletonization algorithm to extract and curate centerline graphs and morphometric features from the TopCoW dataset, which includes 200 stroke patients, each imaged with MRA and CTA. The curated graphs were used to develop a baseline algorithm for centerline and feature extraction, combining U-Net-based skeletonization with A* graph connection. Performance was evaluated on a held-out test set, focusing on anatomical accuracy and feature robustness. Further, we used the extracted features to predict the frequency of fetal PCA variants, confirm theoretical bifurcation optimality relations, and detect subtle modality differences. The baseline algorithm consistently reconstructed graph topology with high accuracy (F1 = 1), and the average Euclidean node distance between reference and predicted graphs was below one voxel. Features such as segment radius, length, and bifurcation ratios showed strong robustness, with median relative errors below 5% and Pearson correlations above 0.95. Our results demonstrate the utility of learning-based skeletonization combined with graph connection for anatomically plausible centerline extraction. We emphasize the importance of going beyond simple voxel-based measures by evaluating anatomical accuracy and feature robustness. The dataset and baseline algorithm have been released to support further method development and clinical research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13678v1" target="_blank" rel="noopener noreferrer">
                FlashWorld：数秒内生成高质量3D场景
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FlashWorld: High-quality 3D Scene Generation within Seconds
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xinyang Li, Tengfei Wang, Zixiao Gu, Shengchuan Zhang, Chunchao Guo, Liujuan Cao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D场景生成技术，属于计算机图形学领域，与推荐系统、搜索或广告的核心技术栈无直接关联。3D生成技术主要应用于游戏、虚拟现实和数字孪生等场景，无法为推荐排序、用户意图理解或广告投放等核心业务提供技术支撑。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 15:35:48
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13678v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13678v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We propose FlashWorld, a generative model that produces 3D scenes from a single image or text prompt in seconds, 10~100$\times$ faster than previous works while possessing superior rendering quality. Our approach shifts from the conventional multi-view-oriented (MV-oriented) paradigm, which generates multi-view images for subsequent 3D reconstruction, to a 3D-oriented approach where the model directly produces 3D Gaussian representations during multi-view generation. While ensuring 3D consistency, 3D-oriented method typically suffers poor visual quality. FlashWorld includes a dual-mode pre-training phase followed by a cross-mode post-training phase, effectively integrating the strengths of both paradigms. Specifically, leveraging the prior from a video diffusion model, we first pre-train a dual-mode multi-view diffusion model, which jointly supports MV-oriented and 3D-oriented generation modes. To bridge the quality gap in 3D-oriented generation, we further propose a cross-mode post-training distillation by matching distribution from consistent 3D-oriented mode to high-quality MV-oriented mode. This not only enhances visual quality while maintaining 3D consistency, but also reduces the required denoising steps for inference. Also, we propose a strategy to leverage massive single-view images and text prompts during this process to enhance the model's generalization to out-of-distribution inputs. Extensive experiments demonstrate the superiority and efficiency of our method.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13670v1" target="_blank" rel="noopener noreferrer">
                NTIRE 2025低光照图像增强挑战赛：方法与结果
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            NTIRE 2025 Challenge on Low Light Image Enhancement: Methods and Results
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Xiaoning Liu, Zongwei Wu, Florin-Alexandru Vasluianu, Hailong Yan, Bin Ren, Yulu...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的低光照图像增强技术，属于纯粹的视觉处理任务。虽然图像质量提升在某些广告或内容场景中有间接价值，但该研究本身不涉及推荐系统、搜索或广告的核心技术，也没有与Transformer架构、LLM技术或异构数据建模的直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 15:30:16
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13670v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13670v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper presents a comprehensive review of the NTIRE 2025 Low-Light Image Enhancement (LLIE) Challenge, highlighting the proposed solutions and final outcomes. The objective of the challenge is to identify effective networks capable of producing brighter, clearer, and visually compelling images under diverse and challenging conditions. A remarkable total of 762 participants registered for the competition, with 28 teams ultimately submitting valid entries. This paper thoroughly evaluates the state-of-the-art advancements in LLIE, showcasing the significant progress.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13669v1" target="_blank" rel="noopener noreferrer">
                CanvasMAR：通过画布改进掩码自回归视频生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CanvasMAR: Improving Masked Autoregressive Video Generation With Canvas
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zian Li, Muhan Zhang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于视频生成技术，属于纯粹的视觉生成领域，与推荐系统、搜索或广告的核心技术没有直接关联。尽管涉及自回归模型，但该技术主要面向视频内容生成，没有明确的推荐、搜索或广告应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 15:29:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13669v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13669v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Masked autoregressive models (MAR) have recently emerged as a powerful paradigm for image and video generation, combining the flexibility of masked modeling with the potential of continuous tokenizer. However, video MAR models suffer from two major limitations: the slow-start problem, caused by the lack of a structured global prior at early sampling stages, and error accumulation across the autoregression in both spatial and temporal dimensions. In this work, we propose CanvasMAR, a novel video MAR model that mitigates these issues by introducing a canvas mechanism--a blurred, global prediction of the next frame, used as the starting point for masked generation. The canvas provides global structure early in sampling, enabling faster and more coherent frame synthesis. Furthermore, we introduce compositional classifier-free guidance that jointly enlarges spatial (canvas) and temporal conditioning, and employ noise-based canvas augmentation to enhance robustness. Experiments on the BAIR and Kinetics-600 benchmarks demonstrate that CanvasMAR produces high-quality videos with fewer autoregressive steps. Our approach achieves remarkable performance among autoregressive models on Kinetics-600 dataset and rivals diffusion-based methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13660v1" target="_blank" rel="noopener noreferrer">
                OmniGaze：基于奖励启发的野外可泛化视线估计
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            OmniGaze: Reward-inspired Generalizable Gaze Estimation In The Wild
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hongyu Qu, Jianan Wei, Xiangbo Shu, Yazhou Yao, Wenguan Wang, Jinhui Tang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的视线估计技术，属于纯粹的视觉研究方向。虽然标题提到'奖励启发'，但这与推荐系统、搜索或广告中的奖励机制无关，而是视觉任务中的技术方法。该工作没有展示与异构数据建模、Transformer架构改进或LLM技术应用的明显关联，因此与当前关注领域高度不相关。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 15:19:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13660v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13660v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Current 3D gaze estimation methods struggle to generalize across diverse data domains, primarily due to i) the scarcity of annotated datasets, and ii) the insufficient diversity of labeled data. In this work, we present OmniGaze, a semi-supervised framework for 3D gaze estimation, which utilizes large-scale unlabeled data collected from diverse and unconstrained real-world environments to mitigate domain bias and generalize gaze estimation in the wild. First, we build a diverse collection of unlabeled facial images, varying in facial appearances, background environments, illumination conditions, head poses, and eye occlusions. In order to leverage unlabeled data spanning a broader distribution, OmniGaze adopts a standard pseudo-labeling strategy and devises a reward model to assess the reliability of pseudo labels. Beyond pseudo labels as 3D direction vectors, the reward model also incorporates visual embeddings extracted by an off-the-shelf visual encoder and semantic cues from gaze perspective generated by prompting a Multimodal Large Language Model to compute confidence scores. Then, these scores are utilized to select high-quality pseudo labels and weight them for loss computation. Extensive experiments demonstrate that OmniGaze achieves state-of-the-art performance on five datasets under both in-domain and cross-domain settings. Furthermore, we also evaluate the efficacy of OmniGaze as a scalable data engine for gaze estimation, which exhibits robust zero-shot generalization on four unseen datasets.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13638v1" target="_blank" rel="noopener noreferrer">
                医学图像增强中的挑战、进展与评估指标：系统性文献综述
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Challenges, Advances, and Evaluation Metrics in Medical Image Enhancement: A Systematic Literature Review
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Chun Wai Chin, Haniza Yazid, Hoi Leong Lee
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学图像增强领域，属于明确的医学领域特定应用，与搜索、推荐、广告等核心领域完全无关。论文内容涉及医学图像处理技术，属于被明确排除的无关主题范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 15:01:58
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13638v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13638v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Medical image enhancement is crucial for improving the quality and interpretability of diagnostic images, ultimately supporting early detection, accurate diagnosis, and effective treatment planning. Despite advancements in imaging technologies such as X-ray, CT, MRI, and ultrasound, medical images often suffer from challenges like noise, artifacts, and low contrast, which limit their diagnostic potential. Addressing these challenges requires robust preprocessing, denoising algorithms, and advanced enhancement methods, with deep learning techniques playing an increasingly significant role. This systematic literature review, following the PRISMA approach, investigates the key challenges, recent advancements, and evaluation metrics in medical image enhancement. By analyzing findings from 39 peer-reviewed studies, this review provides insights into the effectiveness of various enhancement methods across different imaging modalities and the importance of evaluation metrics in assessing their impact. Key issues like low contrast and noise are identified as the most frequent, with MRI and multi-modal imaging receiving the most attention, while specialized modalities such as histopathology, endoscopy, and bone scintigraphy remain underexplored. Out of the 39 studies, 29 utilize conventional mathematical methods, 9 focus on deep learning techniques, and 1 explores a hybrid approach. In terms of image quality assessment, 18 studies employ both reference-based and non-reference-based metrics, 9 rely solely on reference-based metrics, and 12 use only non-reference-based metrics, with a total of 65 IQA metrics introduced, predominantly non-reference-based. This review highlights current limitations, research gaps, and potential future directions for advancing medical image enhancement.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13630v1" target="_blank" rel="noopener noreferrer">
                AVAR-Net：一种轻量级音视频异常识别框架及基准数据集
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            AVAR-Net: A Lightweight Audio-Visual Anomaly Recognition Framework with a Benchmark Dataset
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Amjid Ali, Zulfiqar Ahmad Khan, Altaf Hussain, Muhammad Munsif, Adnan Hussain, S...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于音视频异常识别，属于纯粹的视觉和音频处理领域，与推荐系统、搜索或广告的核心技术没有直接关联。虽然提到了轻量级框架，但其应用场景局限于异常检测，无法为RecSys/Search/Ads领域提供可借鉴的技术或方法。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:56:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13630v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13630v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Anomaly recognition plays a vital role in surveillance, transportation, healthcare, and public safety. However, most existing approaches rely solely on visual data, making them unreliable under challenging conditions such as occlusion, low illumination, and adverse weather. Moreover, the absence of large-scale synchronized audio-visual datasets has hindered progress in multimodal anomaly recognition. To address these limitations, this study presents AVAR-Net, a lightweight and efficient audio-visual anomaly recognition framework designed for real-world environments. AVAR-Net consists of four main modules: an audio feature extractor, a video feature extractor, fusion strategy, and a sequential pattern learning network that models cross-modal relationships for anomaly recognition. Specifically, the Wav2Vec2 model extracts robust temporal features from raw audio, while MobileViT captures both local and global visual representations from video frames. An early fusion mechanism combines these modalities, and a Multi-Stage Temporal Convolutional Network (MTCN) model that learns long-range temporal dependencies within the fused representation, enabling robust spatiotemporal reasoning. A novel Visual-Audio Anomaly Recognition (VAAR) dataset, is also introduced, serving as a medium-scale benchmark containing 3,000 real-world videos with synchronized audio across ten diverse anomaly classes. Experimental evaluations demonstrate that AVAR-Net achieves 89.29% accuracy on VAAR and 88.56% Average Precision on the XD-Violence dataset, improving Average Precision by 2.8% over existing state-of-the-art methods. These results highlight the effectiveness, efficiency, and generalization capability of the proposed framework, as well as the utility of VAAR as a benchmark for advancing multimodal anomaly recognition research.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13562v1" target="_blank" rel="noopener noreferrer">
                一种具有理论保证的高效方法，用于同时重建飞行时间正电子发射断层扫描的活动和衰减正弦图
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            An efficient approach with theoretical guarantees to simultaneously reconstruct activity and attenuation sinogram for TOF-PET
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Liyang Hu, Chong Chen
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于医学成像领域的正电子发射断层扫描（PET）重建技术，属于医学物理和生物医学工程的范畴。该主题与推荐系统、搜索、广告或LLM技术没有任何关联，完全超出了指定的关注范围。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 14:01:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13562v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13562v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">physics.med-ph</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.NA</span><span class="category-tag">math.NA</span><span class="category-tag">65J15</span><span class="category-tag">65R32</span><span class="category-tag">65J22</span><span class="category-tag">68U10</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In positron emission tomography (PET), it is indispensable to perform attenuation correction in order to obtain the quantitatively accurate activity map (tracer distribution) in the body. Generally, this is carried out based on the estimated attenuation map obtained from computed tomography or magnetic resonance imaging. However, except for errors in the attenuation correction factors obtained, the additional scan not only brings in new radiation doses and/or increases the scanning time but also leads to severe misalignment induced by various motions during and between the two sequential scans. To address these issues, based on maximum likelihood estimation, we propose a new mathematical model for simultaneously reconstructing the activity and attenuation sinogram from the time-of-flight (TOF)-PET emission data only. Particularly, we make full use of the exclusively exponential form for the attenuation correction factors, and consider the constraint of a total amount of the activity in some mask region in the proposed model. Furthermore, we prove its well-posedness, including the existence, uniqueness and stability of the solution. We propose an alternating update algorithm to solve the model, and also analyze its convergence. Finally, numerical experiments with various TOF-PET emission data demonstrate that the proposed method is of numerical convergence and robust to noise, and outperforms some state-of-the-art methods in terms of accuracy and efficiency, and has the capability of autonomous attenuation correction.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13546v1" target="_blank" rel="noopener noreferrer">
                视觉SLAM加速特征检测器：FPGA与GPU对比研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Accelerated Feature Detectors for Visual SLAM: A Comparative Study of FPGA vs GPU
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ruiqi Ye, Mikel Luján
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的SLAM（同时定位与地图构建）技术，主要研究硬件加速器（FPGA vs GPU）在视觉特征检测中的性能比较。这与搜索、推荐或广告系统的核心领域进展、LLM技术或Transformer架构没有直接关联，也不涉及异构数据的统一建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 13:40:55
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13546v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13546v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.ET</span><span class="category-tag">cs.PF</span><span class="category-tag">cs.RO</span><span class="category-tag">C.3; C.4; I.4.6</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Feature detection is a common yet time-consuming module in Simultaneous Localization and Mapping (SLAM) implementations, which are increasingly deployed on power-constrained platforms, such as drones. Graphics Processing Units (GPUs) have been a popular accelerator for computer vision in general, and feature detection and SLAM in particular. On the other hand, System-on-Chips (SoCs) with integrated Field Programmable Gate Array (FPGA) are also widely available. This paper presents the first study of hardware-accelerated feature detectors considering a Visual SLAM (V-SLAM) pipeline. We offer new insights by comparing the best GPU-accelerated FAST, Harris, and SuperPoint implementations against the FPGA-accelerated counterparts on modern SoCs (Nvidia Jetson Orin and AMD Versal). The evaluation shows that when using a non-learning-based feature detector such as FAST and Harris, their GPU implementations, and the GPU-accelerated V-SLAM can achieve better run-time performance and energy efficiency than the FAST and Harris FPGA implementations as well as the FPGA-accelerated V-SLAM. However, when considering a learning-based detector such as SuperPoint, its FPGA implementation can achieve better run-time performance and energy efficiency (up to 3.1$\times$ and 1.4$\times$ improvements, respectively) than the GPU implementation. The FPGA-accelerated V-SLAM can also achieve comparable run-time performance compared to the GPU-accelerated V-SLAM, with better FPS in 2 out of 5 dataset sequences. When considering the accuracy, the results show that the GPU-accelerated V-SLAM is more accurate than the FPGA-accelerated V-SLAM in general. Last but not least, the use of hardware acceleration for feature detection could further improve the performance of the V-SLAM pipeline by having the global bundle adjustment module invoked less frequently without sacrificing accuracy.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13540v1" target="_blank" rel="noopener noreferrer">
                学习神经参数化3D乳房形状模型用于从单目RGB视频进行度量表面重建
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Learning Neural Parametric 3D Breast Shape Models for Metrical Surface Reconstruction From Monocular RGB Videos
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Maximilian Weiherer, Antonia von Riedheim, Vanessa Brébant, Bernhard Egger, Chri...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D医学图像重建和计算机视觉，涉及乳房形状建模和度量表面重建。这与我的关注点完全无关，因为它是纯粹的医学/生物医学应用，属于明确排除的领域，与推荐系统、搜索或广告没有任何潜在联系。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 13:35:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13540v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13540v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    We present a neural parametric 3D breast shape model and, based on this model, introduce a low-cost and accessible 3D surface reconstruction pipeline capable of recovering accurate breast geometry from a monocular RGB video. In contrast to widely used, commercially available yet prohibitively expensive 3D breast scanning solutions and existing low-cost alternatives, our method requires neither specialized hardware nor proprietary software and can be used with any device that is able to record RGB videos. The key building blocks of our pipeline are a state-of-the-art, off-the-shelf Structure-from-motion pipeline, paired with a parametric breast model for robust and metrically correct surface reconstruction. Our model, similarly to the recently proposed implicit Regensburg Breast Shape Model (iRBSM), leverages implicit neural representations to model breast shapes. However, unlike the iRBSM, which employs a single global neural signed distance function (SDF), our approach -- inspired by recent state-of-the-art face models -- decomposes the implicit breast domain into multiple smaller regions, each represented by a local neural SDF anchored at anatomical landmark positions. When incorporated into our surface reconstruction pipeline, the proposed model, dubbed liRBSM (short for localized iRBSM), significantly outperforms the iRBSM in terms of reconstruction quality, yielding more detailed surface reconstruction than its global counterpart. Overall, we find that the introduced pipeline is able to recover high-quality 3D breast geometry within an error margin of less than 2 mm. Our method is fast (requires less than six minutes), fully transparent and open-source, and -- together with the model -- publicly available at https://rbsm.re-mic.de/local-implicit.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13493v1" target="_blank" rel="noopener noreferrer">
                ExpressNet-MoE：一种用于情感识别的混合深度神经网络
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            ExpressNet-MoE: A Hybrid Deep Neural Network for Emotion Recognition
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Deeptimaan Banerjee, Prateek Gothwal, Ashis Kumer Biswas
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于情感识别这一特定应用领域，与推荐系统、搜索或广告的核心技术无关。虽然采用了混合专家（MoE）架构，但情感识别属于心理学和情感计算领域，没有明显的潜在应用可以关联到RecSys/Search/Ads的核心技术需求。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 12:42:49
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13493v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13493v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span><span class="category-tag">I.2.10; I.5.2; H.4.2</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In many domains, including online education, healthcare, security, and human-computer interaction, facial emotion recognition (FER) is essential. Real-world FER is still difficult despite its significance because of some factors such as variable head positions, occlusions, illumination shifts, and demographic diversity. Engagement detection, which is essential for applications like virtual learning and customer services, is frequently challenging due to FER limitations by many current models. In this article, we propose ExpressNet-MoE, a novel hybrid deep learning model that blends both Convolution Neural Networks (CNNs) and Mixture of Experts (MoE) framework, to overcome the difficulties. Our model dynamically chooses the most pertinent expert networks, thus it aids in the generalization and providing flexibility to model across a wide variety of datasets. Our model improves on the accuracy of emotion recognition by utilizing multi-scale feature extraction to collect both global and local facial features. ExpressNet-MoE includes numerous CNN-based feature extractors, a MoE module for adaptive feature selection, and finally a residual network backbone for deep feature learning. To demonstrate efficacy of our proposed model we evaluated on several datasets, and compared with current state-of-the-art methods. Our model achieves accuracies of 74.77% on AffectNet (v7), 72.55% on AffectNet (v8), 84.29% on RAF-DB, and 64.66% on FER-2013. The results show how adaptive our model is and how it may be used to develop end-to-end emotion recognition systems in practical settings. Reproducible codes and results are made publicly accessible at https://github.com/DeeptimaanB/ExpressNet-MoE.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13454v1" target="_blank" rel="noopener noreferrer">
                VIST3A：通过将多视图重建网络与视频生成器拼接实现文本到3D生成
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VIST3A: Text-to-3D by Stitching a Multi-view Reconstruction Network to a Video Generator
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hyojun Go, Dominik Narnhofer, Goutam Bhat, Prune Truong, Federico Tombari, Konra...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本到3D生成和视频生成技术，属于计算机视觉和图形学领域。虽然涉及多模态处理，但其核心是3D内容生成和视频合成，与推荐系统、搜索或广告中的排名、检索、用户建模等核心任务没有直接关联。该技术主要面向AIGC和内容生成应用，属于明确排除的无关主题范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 11:55:08
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13454v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13454v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid progress of large, pretrained models for both visual content generation and 3D reconstruction opens up new possibilities for text-to-3D generation. Intuitively, one could obtain a formidable 3D scene generator if one were able to combine the power of a modern latent text-to-video model as "generator" with the geometric abilities of a recent (feedforward) 3D reconstruction system as "decoder". We introduce VIST3A, a general framework that does just that, addressing two main challenges. First, the two components must be joined in a way that preserves the rich knowledge encoded in their weights. We revisit model stitching, i.e., we identify the layer in the 3D decoder that best matches the latent representation produced by the text-to-video generator and stitch the two parts together. That operation requires only a small dataset and no labels. Second, the text-to-video generator must be aligned with the stitched 3D decoder, to ensure that the generated latents are decodable into consistent, perceptually convincing 3D scene geometry. To that end, we adapt direct reward finetuning, a popular technique for human preference alignment. We evaluate the proposed VIST3A approach with different video generators and 3D reconstruction models. All tested pairings markedly improve over prior text-to-3D models that output Gaussian splats. Moreover, by choosing a suitable 3D base model, VIST3A also enables high-quality text-to-pointmap generation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13452v1" target="_blank" rel="noopener noreferrer">
                近红外高光谱成像在食品分析中的应用——算法与方法的改进
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Near-Infrared Hyperspectral Imaging Applications in Food Analysis -- Improving Algorithms and Methodologies
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Ole-Christian Galbo Engstrøm
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于食品分析领域的近红外高光谱成像技术，属于食品科学和化学分析领域，与推荐系统、搜索或广告的核心技术完全无关。论文内容涉及特定领域的传感器技术和分析方法，没有任何与LLM、Transformer架构或推荐系统相关的技术元素。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 11:53:01
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13452v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13452v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This thesis investigates the application of near-infrared hyperspectral imaging (NIR-HSI) for food quality analysis. The investigation is conducted through four studies operating with five research hypotheses. For several analyses, the studies compare models based on convolutional neural networks (CNNs) and partial least squares (PLS). Generally, joint spatio-spectral analysis with CNNs outperforms spatial analysis with CNNs and spectral analysis with PLS when modeling parameters where chemical and physical visual information are relevant. When modeling chemical parameters with a 2-dimensional (2D) CNN, augmenting the CNN with an initial layer dedicated to performing spectral convolution enhances its predictive performance by learning a spectral preprocessing similar to that applied by domain experts. Still, PLS-based spectral modeling performs equally well for analysis of the mean content of chemical parameters in samples and is the recommended approach. Modeling the spatial distribution of chemical parameters with NIR-HSI is limited by the ability to obtain spatially resolved reference values. Therefore, a study used bulk mean references for chemical map generation of fat content in pork bellies. A PLS-based approach gave non-smooth chemical maps and pixel-wise predictions outside the range of 0-100\%. Conversely, a 2D CNN augmented with a spectral convolution layer mitigated all issues arising with PLS. The final study attempted to model barley's germinative capacity by analyzing NIR spectra, RGB images, and NIR-HSI images. However, the results were inconclusive due to the dataset's low degree of germination. Additionally, this thesis has led to the development of two open-sourced Python packages. The first facilitates fast PLS-based modeling, while the second facilitates very fast cross-validation of PLS and other classical machine learning models with a new algorithm.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13441v1" target="_blank" rel="noopener noreferrer">
                用于PET图像重建领域自适应的可操控条件扩散模型
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Steerable Conditional Diffusion for Domain Adaptation in PET Image Reconstruction
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>George Webber, Alexander Hammers, Andrew P. King, Andrew J. Reader
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学影像（PET图像重建）领域，属于明确的无关主题范畴。虽然涉及扩散模型技术，但其应用场景与搜索、推荐、广告系统完全无关，且没有展示任何在推荐系统领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 11:40:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13441v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13441v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">physics.med-ph</span><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Diffusion models have recently enabled state-of-the-art reconstruction of positron emission tomography (PET) images while requiring only image training data. However, domain shift remains a key concern for clinical adoption: priors trained on images from one anatomy, acquisition protocol or pathology may produce artefacts on out-of-distribution data. We propose integrating steerable conditional diffusion (SCD) with our previously-introduced likelihood-scheduled diffusion (PET-LiSch) framework to improve the alignment of the diffusion model's prior to the target subject. At reconstruction time, for each diffusion step, we use low-rank adaptation (LoRA) to align the diffusion model prior with the target domain on the fly. Experiments on realistic synthetic 2D brain phantoms demonstrate that our approach suppresses hallucinated artefacts under domain shift, i.e. when our diffusion model is trained on perturbed images and tested on normal anatomy, our approach suppresses the hallucinated structure, outperforming both OSEM and diffusion model baselines qualitatively and quantitatively. These results provide a proof-of-concept that steerable priors can mitigate domain shift in diffusion-based PET reconstruction and motivate future evaluation on real data.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13433v1" target="_blank" rel="noopener noreferrer">
                超越像素：一种用于探测3D神经元选择性的可微分流程
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Beyond Pixels: A Differentiable Pipeline for Probing Neuronal Selectivity in 3D
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Pavithra Elumalai, Mohammad Bashiri, Goirik Chakrabarty, Suhas Shrinivasan, Fabi...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于3D神经科学和神经元选择性分析，属于生物医学领域的神经科学研究。标题中提到的3D、神经元选择性等概念与推荐系统、搜索或广告的技术焦点完全无关，也没有任何潜在的Transformer架构或LLM应用前景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 11:29:21
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13433v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13433v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Visual perception relies on inference of 3D scene properties such as shape, pose, and lighting. To understand how visual sensory neurons enable robust perception, it is crucial to characterize their selectivity to such physically interpretable factors. However, current approaches mainly operate on 2D pixels, making it difficult to isolate selectivity for physical scene properties. To address this limitation, we introduce a differentiable rendering pipeline that optimizes deformable meshes to obtain MEIs directly in 3D. The method parameterizes mesh deformations with radial basis functions and learns offsets and scales that maximize neuronal responses while enforcing geometric regularity. Applied to models of monkey area V4, our approach enables probing neuronal selectivity to interpretable 3D factors such as pose and lighting. This approach bridges inverse graphics with systems neuroscience, offering a way to probe neural selectivity with physically grounded, 3D stimuli beyond conventional pixel-based methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13419v1" target="_blank" rel="noopener noreferrer">
                基于补丁内容一致性适配器的超高分辨率图像修复
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Ultra High-Resolution Image Inpainting with Patch-Based Content Consistency Adapter
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jianhui Zhang, Sheng Cheng, Qirui Sun, Jia Liu, Wang Luyang, Chaoyu Feng, Chen F...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的图像修复技术，与推荐系统、搜索或广告的核心技术无关。虽然图像修复在某些边缘场景可能有应用（如广告素材处理），但这属于明确的无关主题范畴，且论文未提及任何与LLM、Transformer架构或推荐系统相关的技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 11:18:24
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13419v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13419v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In this work, we present Patch-Adapter, an effective framework for high-resolution text-guided image inpainting. Unlike existing methods limited to lower resolutions, our approach achieves 4K+ resolution while maintaining precise content consistency and prompt alignment, two critical challenges in image inpainting that intensify with increasing resolution and texture complexity. Patch-Adapter leverages a two-stage adapter architecture to scale the diffusion model's resolution from 1K to 4K+ without requiring structural overhauls: (1) Dual Context Adapter learns coherence between masked and unmasked regions at reduced resolutions to establish global structural consistency; and (2) Reference Patch Adapter implements a patch-level attention mechanism for full-resolution inpainting, preserving local detail fidelity through adaptive feature fusion. This dual-stage architecture uniquely addresses the scalability gap in high-resolution inpainting by decoupling global semantics from localized refinement. Experiments demonstrate that Patch-Adapter not only resolves artifacts common in large-scale inpainting but also achieves state-of-the-art performance on the OpenImages and Photo-Concept-Bucket datasets, outperforming existing methods in both perceptual quality and text-prompt adherence.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13418v1" target="_blank" rel="noopener noreferrer">
                强化学习遇见掩码生成模型：用于文本到图像生成的Mask-GRPO
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Reinforcement Learning Meets Masked Generative Models: Mask-GRPO for Text-to-Image Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yifu Luo, Xinhao Hu, Keyu Fan, Haoyuan Sun, Zeyu Chen, Bo Xia, Tiantian Zhang, Y...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于文本到图像生成，这属于纯粹的AIGC和内容生成领域，与我的关注点无关。虽然涉及强化学习，但应用于图像生成任务，与推荐系统、搜索或广告没有明确关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 11:18:12
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13418v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13418v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Reinforcement learning (RL) has garnered increasing attention in text-to-image (T2I) generation. However, most existing RL approaches are tailored to either diffusion models or autoregressive models, overlooking an important alternative: masked generative models. In this work, we propose Mask-GRPO, the first method to incorporate Group Relative Policy Optimization (GRPO)-based RL into this overlooked paradigm. Our core insight is to redefine the transition probability, which is different from current approaches, and formulate the unmasking process as a multi-step decision-making problem. To further enhance our method, we explore several useful strategies, including removing the KL constraint, applying the reduction strategy, and filtering out low-quality samples. Using Mask-GRPO, we improve a base model, Show-o, with substantial improvements on standard T2I benchmarks and preference alignment, outperforming existing state-of-the-art approaches. The code is available on https://github.com/xingzhejun/Mask-GRPO
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13381v1" target="_blank" rel="noopener noreferrer">
                利用2D先验和SDF引导进行动态城市场景渲染
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Leveraging 2D Priors and SDF Guidance for Dynamic Urban Scene Rendering
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Siddharth Tourani, Jayaram Reddy, Akash Kumbar, Satyajit Tourani, Nishant Goyal,...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于计算机视觉中的动态场景渲染技术，涉及2D先验和符号距离函数(SDF)引导。虽然标题提到城市场景，但这属于纯粹的视觉渲染领域，与推荐系统、搜索或广告的核心技术没有直接关联。该研究缺乏在RecSys/Search/Ads领域的潜在应用前景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 10:21:36
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13381v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13381v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.GR</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Dynamic scene rendering and reconstruction play a crucial role in computer vision and augmented reality. Recent methods based on 3D Gaussian Splatting (3DGS), have enabled accurate modeling of dynamic urban scenes, but for urban scenes they require both camera and LiDAR data, ground-truth 3D segmentations and motion data in the form of tracklets or pre-defined object templates such as SMPL. In this work, we explore whether a combination of 2D object agnostic priors in the form of depth and point tracking coupled with a signed distance function (SDF) representation for dynamic objects can be used to relax some of these requirements. We present a novel approach that integrates Signed Distance Functions (SDFs) with 3D Gaussian Splatting (3DGS) to create a more robust object representation by harnessing the strengths of both methods. Our unified optimization framework enhances the geometric accuracy of 3D Gaussian splatting and improves deformation modeling within the SDF, resulting in a more adaptable and precise representation. We demonstrate that our method achieves state-of-the-art performance in rendering metrics even without LiDAR data on urban scenes. When incorporating LiDAR, our approach improved further in reconstructing and generating novel views across diverse object categories, without ground-truth 3D motion annotation. Additionally, our method enables various scene editing tasks, including scene decomposition, and scene composition.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13349v1" target="_blank" rel="noopener noreferrer">
                无参考渲染视频质量评估：数据集与指标
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            No-Reference Rendered Video Quality Assessment: Dataset and Metrics
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Sipeng Yang, Jiayu Ji, Qingchuan Zhu, Zhiyao Yang, Xiaogang Jin
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频质量评估的特定计算机视觉任务，与推荐系统、搜索或广告的核心领域进展无关。虽然视频内容可能出现在某些推荐场景中，但该研究本身不涉及LLM技术、Transformer架构改进，也没有展示在推荐/搜索/广告中的直接应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:36:52
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13349v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13349v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Quality assessment of videos is crucial for many computer graphics applications, including video games, virtual reality, and augmented reality, where visual performance has a significant impact on user experience. When test videos cannot be perfectly aligned with references or when references are unavailable, the significance of no-reference video quality assessment (NR-VQA) methods is undeniable. However, existing NR-VQA datasets and metrics are primarily focused on camera-captured videos; applying them directly to rendered videos would result in biased predictions, as rendered videos are more prone to temporal artifacts. To address this, we present a large rendering-oriented video dataset with subjective quality annotations, as well as a designed NR-VQA metric specific to rendered videos. The proposed dataset includes a wide range of 3D scenes and rendering settings, with quality scores annotated for various display types to better reflect real-world application scenarios. Building on this dataset, we calibrate our NR-VQA metric to assess rendered video quality by looking at both image quality and temporal stability. We compare our metric to existing NR-VQA metrics, demonstrating its superior performance on rendered videos. Finally, we demonstrate that our metric can be used to benchmark supersampling methods and assess frame generation strategies in real-time rendering.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13326v1" target="_blank" rel="noopener noreferrer">
                DEF-YOLO：利用YOLO进行热成像中的隐蔽武器检测
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DEF-YOLO: Leveraging YOLO for Concealed Weapon Detection in Thermal Imagin
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Divya Bhardwaj, Arnav Ramamoorthy, Poonam Goyal
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于热成像中的武器检测，属于纯粹的计算机视觉应用，与推荐系统、搜索或广告领域没有直接关联。虽然YOLO是目标检测模型，但该应用场景（隐蔽武器检测）属于安防领域，无法应用于RecSys/Search/Ads中的任何核心问题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:13:35
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13326v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13326v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Concealed weapon detection aims at detecting weapons hidden beneath a person's clothing or luggage. Various imaging modalities like Millimeter Wave, Microwave, Terahertz, Infrared, etc., are exploited for the concealed weapon detection task. These imaging modalities have their own limitations, such as poor resolution in microwave imaging, privacy concerns in millimeter wave imaging, etc. To provide a real-time, 24 x 7 surveillance, low-cost, and privacy-preserved solution, we opted for thermal imaging in spite of the lack of availability of a benchmark dataset. We propose a novel approach and a dataset for concealed weapon detection in thermal imagery. Our YOLO-based architecture, DEF-YOLO, is built with key enhancements in YOLOv8 tailored to the unique challenges of concealed weapon detection in thermal vision. We adopt deformable convolutions at the SPPF layer to exploit multi-scale features; backbone and neck layers to extract low, mid, and high-level features, enabling DEF-YOLO to adaptively focus on localization around the objects in thermal homogeneous regions, without sacrificing much of the speed and throughput. In addition to these simple yet effective key architectural changes, we introduce a new, large-scale Thermal Imaging Concealed Weapon dataset, TICW, featuring a diverse set of concealed weapons and capturing a wide range of scenarios. To the best of our knowledge, this is the first large-scale contributed dataset for this task. We also incorporate focal loss to address the significant class imbalance inherent in the concealed weapon detection task. The efficacy of the proposed work establishes a new benchmark through extensive experimentation for concealed weapon detection in thermal imagery.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13317v1" target="_blank" rel="noopener noreferrer">
                从光流估计器中移除代价体
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Removing Cost Volumes from Optical Flow Estimators
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Simon Kiefhaber, Stefan Roth, Simone Schaub-Meyer
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">这篇论文专注于计算机视觉中的光流估计技术，属于纯粹的视觉处理领域。虽然光流在视频理解中有应用，但该论文的核心创新（移除代价体）是视觉算法优化，与推荐系统、搜索或广告中的排序、检索、用户建模等核心问题没有直接关联，也没有明显的跨模态应用潜力。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 09:07:09
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13317v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13317v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">I.4.8</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Cost volumes are used in every modern optical flow estimator, but due to their computational and space complexity, they are often a limiting factor regarding both processing speed and the resolution of input frames. Motivated by our empirical observation that cost volumes lose their importance once all other network parts of, e.g., a RAFT-based pipeline have been sufficiently trained, we introduce a training strategy that allows removing the cost volume from optical flow estimators throughout training. This leads to significantly improved inference speed and reduced memory requirements. Using our training strategy, we create three different models covering different compute budgets. Our most accurate model reaches state-of-the-art accuracy while being $1.2\times$ faster and having a $6\times$ lower memory footprint than comparable models; our fastest model is capable of processing Full HD frames at $20\,\mathrm{FPS}$ using only $500\,\mathrm{MB}$ of GPU memory.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13310v1" target="_blank" rel="noopener noreferrer">
                InstantSfM：完全稀疏且并行的运动恢复结构方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            InstantSfM: Fully Sparse and Parallel Structure-from-Motion
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jiankun Zhong, Zitong Zhan, Quankai Gao, Ziyu Chen, Haozhe Lou, Jiageng Mao, Ulr...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉中的运动恢复结构（SfM）技术，这是一个纯粹的3D视觉和重建问题。虽然提到了稀疏性和并行化等效率改进，但这些优化特定于视觉重建流程，与推荐系统、搜索或广告中的排序、检索或用户建模没有直接关联。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:58:05
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13310v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13310v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Structure-from-Motion (SfM), a method that recovers camera poses and scene geometry from uncalibrated images, is a central component in robotic reconstruction and simulation. Despite the state-of-the-art performance of traditional SfM methods such as COLMAP and its follow-up work, GLOMAP, naive CPU-specialized implementations of bundle adjustment (BA) or global positioning (GP) introduce significant computational overhead when handling large-scale scenarios, leading to a trade-off between accuracy and speed in SfM. Moreover, the blessing of efficient C++-based implementations in COLMAP and GLOMAP comes with the curse of limited flexibility, as they lack support for various external optimization options. On the other hand, while deep learning based SfM pipelines like VGGSfM and VGGT enable feed-forward 3D reconstruction, they are unable to scale to thousands of input views at once as GPU memory consumption increases sharply as the number of input views grows. In this paper, we unleash the full potential of GPU parallel computation to accelerate each critical stage of the standard SfM pipeline. Building upon recent advances in sparse-aware bundle adjustment optimization, our design extends these techniques to accelerate both BA and GP within a unified global SfM framework. Through extensive experiments on datasets of varying scales (e.g. 5000 images where VGGSfM and VGGT run out of memory), our method demonstrates up to about 40 times speedup over COLMAP while achieving consistently comparable or even improved reconstruction accuracy. Our project page can be found at https://cre185.github.io/InstantSfM/.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13307v1" target="_blank" rel="noopener noreferrer">
                基于因果表示与推理联合学习的点云分割新类发现
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Novel Class Discovery for Point Cloud Segmentation via Joint Learning of Causal Representation and Reasoning
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yang Li, Aming Wu, Zihao Zhang, Yahong Han
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于点云分割和计算机视觉领域的新类发现问题，与推荐系统、搜索或广告的核心技术无直接关联。论文涉及的因果表示学习虽然具有理论价值，但在当前标题描述中缺乏明确的推荐/搜索/广告应用场景。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 08:54:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13307v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13307v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    In this paper, we focus on Novel Class Discovery for Point Cloud Segmentation (3D-NCD), aiming to learn a model that can segment unlabeled (novel) 3D classes using only the supervision from labeled (base) 3D classes. The key to this task is to setup the exact correlations between the point representations and their base class labels, as well as the representation correlations between the points from base and novel classes. A coarse or statistical correlation learning may lead to the confusion in novel class inference. lf we impose a causal relationship as a strong correlated constraint upon the learning process, the essential point cloud representations that accurately correspond to the classes should be uncovered. To this end, we introduce a structural causal model (SCM) to re-formalize the 3D-NCD problem and propose a new method, i.e., Joint Learning of Causal Representation and Reasoning. Specifically, we first analyze hidden confounders in the base class representations and the causal relationships between the base and novel classes through SCM. We devise a causal representation prototype that eliminates confounders to capture the causal representations of base classes. A graph structure is then used to model the causal relationships between the base classes' causal representation prototypes and the novel class prototypes, enabling causal reasoning from base to novel classes. Extensive experiments and visualization results on 3D and 2D NCD semantic segmentation demonstrate the superiorities of our method.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13250v1" target="_blank" rel="noopener noreferrer">
                面向嵌入式系统的轻量级架构实时人群计数
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Real-Time Crowd Counting for Embedded Systems with Lightweight Architecture
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhiyuan Zhao, Yubin Wen, Siyu Yang, Lichen Ning, Yuandong Liu, Junyu Gao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的人群计数技术，属于纯粹的视觉应用，与推荐系统、搜索或广告的核心技术栈没有直接关联。虽然人群计数在某些场景下可能有辅助作用（如实体店客流分析），但这不属于当前关注的核心领域进展或使能技术范畴。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:58:46
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13250v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13250v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Crowd counting is a task of estimating the number of the crowd through images, which is extremely valuable in the fields of intelligent security, urban planning, public safety management, and so on. However, the existing counting methods have some problems in practical application on embedded systems for these fields, such as excessive model parameters, abundant complex calculations, etc. The practical application of embedded systems requires the model to be real-time, which means that the model is fast enough. Considering the aforementioned problems, we design a super real-time model with a stem-encoder-decoder structure for crowd counting tasks, which achieves the fastest inference compared with state-of-the-arts. Firstly, large convolution kernels in the stem network are used to enlarge the receptive field, which effectively extracts detailed head information. Then, in the encoder part, we use conditional channel weighting and multi-branch local fusion block to merge multi-scale features with low computational consumption. This part is crucial to the super real-time performance of the model. Finally, the feature pyramid networks are added to the top of the encoder to alleviate its incomplete fusion problems. Experiments on three benchmarks show that our network is suitable for super real-time crowd counting on embedded systems, ensuring competitive accuracy. At the same time, the proposed network reasoning speed is the fastest. Specifically, the proposed network achieves 381.7 FPS on NVIDIA GTX 1080Ti and 71.9 FPS on NVIDIA Jetson TX1.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13245v1" target="_blank" rel="noopener noreferrer">
                CymbaDiff：基于草图的3D语义城市场景生成的结构化空间扩散方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            CymbaDiff: Structured Spatial Diffusion for Sketch-based 3D Semantic Urban Scene Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Li Liang, Bo Miao, Xinyu Wang, Naveed Akhtar, Jordan Vice, Ajmal Mian
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于基于草图的3D城市场景生成，属于计算机图形学和3D视觉领域。虽然涉及扩散模型技术，但其应用场景（3D城市场景生成）与推荐系统、搜索或广告的核心技术栈没有直接关联，也不涉及处理用户行为序列、上下文特征或排序优化等RecSys/Search/Ads的关键问题。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:47:00
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13245v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13245v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Outdoor 3D semantic scene generation produces realistic and semantically rich environments for applications such as urban simulation and autonomous driving. However, advances in this direction are constrained by the absence of publicly available, well-annotated datasets. We introduce SketchSem3D, the first large-scale benchmark for generating 3D outdoor semantic scenes from abstract freehand sketches and pseudo-labeled annotations of satellite images. SketchSem3D includes two subsets, Sketch-based SemanticKITTI and Sketch-based KITTI-360 (containing LiDAR voxels along with their corresponding sketches and annotated satellite images), to enable standardized, rigorous, and diverse evaluations. We also propose Cylinder Mamba Diffusion (CymbaDiff) that significantly enhances spatial coherence in outdoor 3D scene generation. CymbaDiff imposes structured spatial ordering, explicitly captures cylindrical continuity and vertical hierarchy, and preserves both physical neighborhood relationships and global context within the generated scenes. Extensive experiments on SketchSem3D demonstrate that CymbaDiff achieves superior semantic consistency, spatial realism, and cross-dataset generalization. The code and dataset will be available at https://github.com/Lillian-research-hub/CymbaDiff
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13243v1" target="_blank" rel="noopener noreferrer">
                FlyAwareV2：面向城市场景理解的多模态跨域无人机数据集
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            FlyAwareV2: A Multimodal Cross-Domain UAV Dataset for Urban Scene Understanding
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Francesco Barbato, Matteo Caligiuri, Pietro Zanuttigh
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文聚焦于无人机数据集和城市场景理解，属于纯粹的计算机视觉领域，与推荐系统、搜索或广告的核心技术无直接关联。虽然涉及多模态数据，但主要应用于无人机和城市环境感知，无法为RecSys/Search/Ads领域提供可迁移的技术或应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:44:31
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13243v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13243v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The development of computer vision algorithms for Unmanned Aerial Vehicle (UAV) applications in urban environments heavily relies on the availability of large-scale datasets with accurate annotations. However, collecting and annotating real-world UAV data is extremely challenging and costly. To address this limitation, we present FlyAwareV2, a novel multimodal dataset encompassing both real and synthetic UAV imagery tailored for urban scene understanding tasks. Building upon the recently introduced SynDrone and FlyAware datasets, FlyAwareV2 introduces several new key contributions: 1) Multimodal data (RGB, depth, semantic labels) across diverse environmental conditions including varying weather and daytime; 2) Depth maps for real samples computed via state-of-the-art monocular depth estimation; 3) Benchmarks for RGB and multimodal semantic segmentation on standard architectures; 4) Studies on synthetic-to-real domain adaptation to assess the generalization capabilities of models trained on the synthetic data. With its rich set of annotations and environmental diversity, FlyAwareV2 provides a valuable resource for research on UAV-based 3D urban scene understanding.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13235v1" target="_blank" rel="noopener noreferrer">
                EPIPTrack：基于显式和隐式提示重新思考多目标跟踪中的提示建模
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            EPIPTrack: Rethinking Prompt Modeling with Explicit and Implicit Prompts for Multi-Object Tracking
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yukuan Zhang, Jiarui Zhao, Shangqing Nie, Jin Kuang, Shengsheng Wang
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于计算机视觉领域的多目标跟踪任务，属于纯粹的视觉跟踪技术。虽然提到了提示建模概念，但这是针对视觉跟踪的具体应用，与推荐系统、搜索或广告的核心技术领域没有直接关联。论文内容不涉及任何推荐、搜索或广告相关的应用场景或技术迁移可能性。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:39:30
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13235v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13235v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Multimodal semantic cues, such as textual descriptions, have shown strong potential in enhancing target perception for tracking. However, existing methods rely on static textual descriptions from large language models, which lack adaptability to real-time target state changes and prone to hallucinations. To address these challenges, we propose a unified multimodal vision-language tracking framework, named EPIPTrack, which leverages explicit and implicit prompts for dynamic target modeling and semantic alignment. Specifically, explicit prompts transform spatial motion information into natural language descriptions to provide spatiotemporal guidance. Implicit prompts combine pseudo-words with learnable descriptors to construct individualized knowledge representations capturing appearance attributes. Both prompts undergo dynamic adjustment via the CLIP text encoder to respond to changes in target state. Furthermore, we design a Discriminative Feature Augmentor to enhance visual and cross-modal representations. Extensive experiments on MOT17, MOT20, and DanceTrack demonstrate that EPIPTrack outperforms existing trackers in diverse scenarios, exhibiting robust adaptability and superior performance.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13226v1" target="_blank" rel="noopener noreferrer">
                以样本为中心的多任务学习用于工业表面缺陷检测与分割
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Sample-Centric Multi-Task Learning for Detection and Segmentation of Industrial Surface Defects
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hang-Cheng Dong, Yibo Jiao, Fupeng Wei, Guodong Liu, Dong Ye, Bingguo Liu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于工业表面缺陷检测的计算机视觉应用，属于纯粹的视觉检测任务。虽然涉及多任务学习技术，但其应用场景（工业缺陷检测）与推荐系统、搜索或广告领域没有直接关联，也不涉及LLM技术或Transformer架构的进展。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 07:24:26
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13226v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13226v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Industrial surface defect inspection for sample-wise quality control (QC) must simultaneously decide whether a given sample contains defects and localize those defects spatially. In real production lines, extreme foreground-background imbalance, defect sparsity with a long-tailed scale distribution, and low contrast are common. As a result, pixel-centric training and evaluation are easily dominated by large homogeneous regions, making it difficult to drive models to attend to small or low-contrast defects-one of the main bottlenecks for deployment. Empirically, existing models achieve strong pixel-overlap metrics (e.g., mIoU) but exhibit insufficient stability at the sample level, especially for sparse or slender defects. The root cause is a mismatch between the optimization objective and the granularity of QC decisions. To address this, we propose a sample-centric multi-task learning framework and evaluation suite. Built on a shared-encoder architecture, the method jointly learns sample-level defect classification and pixel-level mask localization. Sample-level supervision modulates the feature distribution and, at the gradient level, continually boosts recall for small and low-contrast defects, while the segmentation branch preserves boundary and shape details to enhance per-sample decision stability and reduce misses. For evaluation, we propose decision-linked metrics, Seg_mIoU and Seg_Recall, which remove the bias of classical mIoU caused by empty or true-negative samples and tightly couple localization quality with sample-level decisions. Experiments on two benchmark datasets demonstrate that our approach substantially improves the reliability of sample-level decisions and the completeness of defect localization.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13208v1" target="_blank" rel="noopener noreferrer">
                MimicParts：面向语音驱动3D运动生成的部分感知风格注入
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            MimicParts: Part-aware Style Injection for Speech-Driven 3D Motion Generation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lianlian Liu, YongKang He, Zhaojie Chu, Xiaofen Xing, Xiangmin Xu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于语音驱动的3D运动生成，属于计算机图形学和语音处理交叉领域。虽然涉及风格注入技术，但主要应用于3D动画生成，与推荐系统、搜索或广告的核心技术栈无直接关联。论文内容不符合当前关注的任何技术方向，包括核心推荐系统进展、LLM使能技术或Transformer架构改进。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:53:15
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13208v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13208v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Generating stylized 3D human motion from speech signals presents substantial challenges, primarily due to the intricate and fine-grained relationships among speech signals, individual styles, and the corresponding body movements. Current style encoding approaches either oversimplify stylistic diversity or ignore regional motion style differences (e.g., upper vs. lower body), limiting motion realism. Additionally, motion style should dynamically adapt to changes in speech rhythm and emotion, but existing methods often overlook this. To address these issues, we propose MimicParts, a novel framework designed to enhance stylized motion generation based on part-aware style injection and part-aware denoising network. It divides the body into different regions to encode localized motion styles, enabling the model to capture fine-grained regional differences. Furthermore, our part-aware attention block allows rhythm and emotion cues to guide each body region precisely, ensuring that the generated motion aligns with variations in speech rhythm and emotional state. Experimental results show that our method outperforming existing methods showcasing naturalness and expressive 3D human motion sequences.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13201v1" target="_blank" rel="noopener noreferrer">
                论文副驾驶：追踪AI会议中同行评审的演变
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Paper Copilot: Tracking the Evolution of Peer Review in AI Conferences
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Jing Yang, Qiyao Wei, Jiaxin Pei
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注AI会议同行评审系统的演变追踪，这属于学术流程和评估机制的研究。论文内容与推荐系统、搜索、广告的核心技术进展、LLM使能技术或Transformer架构改进完全无关，也不涉及异构数据的统一建模。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:41:06
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13201v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13201v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.AI</span><span class="category-tag">cs.DL</span><span class="category-tag">cs.LG</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    The rapid growth of AI conferences is straining an already fragile peer-review system, leading to heavy reviewer workloads, expertise mismatches, inconsistent evaluation standards, superficial or templated reviews, and limited accountability under compressed timelines. In response, conference organizers have introduced new policies and interventions to preserve review standards. Yet these ad-hoc changes often create further concerns and confusion about the review process, leaving how papers are ultimately accepted - and how practices evolve across years - largely opaque. We present Paper Copilot, a system that creates durable digital archives of peer reviews across a wide range of computer-science venues, an open dataset that enables researchers to study peer review at scale, and a large-scale empirical analysis of ICLR reviews spanning multiple years. By releasing both the infrastructure and the dataset, Paper Copilot supports reproducible research on the evolution of peer review. We hope these resources help the community track changes, diagnose failure modes, and inform evidence-based improvements toward a more robust, transparent, and reliable peer-review system.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13186v1" target="_blank" rel="noopener noreferrer">
                STT-GS：具有联合客户端选择与功率控制的采样后传输边缘高斯泼溅
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            STT-GS: Sample-Then-Transmit Edge Gaussian Splatting with Joint Client Selection and Power Control
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zhen Li, Xibin Jin, Guoliang Li, Shuai Wang, Miaowen Wen, Huseyin Arslan, Derric...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文关注边缘计算中的通信优化（客户端选择和功率控制）与计算机视觉渲染技术（高斯泼溅），属于分布式系统优化和计算机图形学领域。这些技术与推荐系统、搜索或广告的核心进展没有直接关联，也不涉及LLM、Transformer架构或异构数据建模等当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 06:20:47
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13186v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13186v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Edge Gaussian splatting (EGS), which aggregates data from distributed clients and trains a global GS model at the edge server, is an emerging paradigm for scene reconstruction. Unlike traditional edge resource management methods that emphasize communication throughput or general-purpose learning performance, EGS explicitly aims to maximize the GS qualities, rendering existing approaches inapplicable. To address this problem, this paper formulates a novel GS-oriented objective function that distinguishes the heterogeneous view contributions of different clients. However, evaluating this function in turn requires clients' images, leading to a causality dilemma. To this end, this paper further proposes a sample-then-transmit EGS (or STT-GS for short) strategy, which first samples a subset of images as pilot data from each client for loss prediction. Based on the first-stage evaluation, communication resources are then prioritized towards more valuable clients. To achieve efficient sampling, a feature-domain clustering (FDC) scheme is proposed to select the most representative data and pilot transmission time minimization (PTTM) is adopted to reduce the pilot overhead.Subsequently, we develop a joint client selection and power control (JCSPC) framework to maximize the GS-oriented function under communication resource constraints. Despite the nonconvexity of the problem, we propose a low-complexity efficient solution based on the penalty alternating majorization minimization (PAMM) algorithm. Experiments unveil that the proposed scheme significantly outperforms existing benchmarks on real-world datasets. It is found that the GS-oriented objective can be accurately predicted with low sampling ratios (e.g.,10%), and our method achieves an excellent tradeoff between view contributions and communication costs.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13160v1" target="_blank" rel="noopener noreferrer">
                DP-TTA：基于字典驱动先验正则化的瞬态电磁信号去噪测试时自适应方法
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            DP-TTA: Test-time Adaptation for Transient Electromagnetic Signal Denoising via Dictionary-driven Prior Regularization
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Meng Yang, Kecheng Chen, Wei Luo, Xianjie Chen, Yong Jia, Mingyue Wang, Fanqiang...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于瞬态电磁信号去噪这一特定信号处理任务，属于物理和工程领域。虽然涉及测试时自适应技术，但其应用场景（电磁信号处理）与推荐系统、搜索或广告领域没有任何直接关联，也不涉及LLM、Transformer架构或异构数据建模等核心技术。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 05:22:03
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13160v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13160v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Transient Electromagnetic (TEM) method is widely used in various geophysical applications, providing valuable insights into subsurface properties. However, time-domain TEM signals are often submerged in various types of noise. While recent deep learning-based denoising models have shown strong performance, these models are mostly trained on simulated or single real-world scenario data, overlooking the significant differences in noise characteristics from different geographical regions. Intuitively, models trained in one environment often struggle to perform well in new settings due to differences in geological conditions, equipment, and external interference, leading to reduced denoising performance. To this end, we propose the Dictionary-driven Prior Regularization Test-time Adaptation (DP-TTA). Our key insight is that TEM signals possess intrinsic physical characteristics, such as exponential decay and smoothness, which remain consistent across different regions regardless of external conditions. These intrinsic characteristics serve as ideal prior knowledge for guiding the TTA strategy, which helps the pre-trained model dynamically adjust parameters by utilizing self-supervised losses, improving denoising performance in new scenarios. To implement this, we customized a network, named DTEMDNet. Specifically, we first use dictionary learning to encode these intrinsic characteristics as a dictionary-driven prior, which is integrated into the model during training. At the testing stage, this prior guides the model to adapt dynamically to new environments by minimizing self-supervised losses derived from the dictionary-driven consistency and the signal one-order variation. Extensive experimental results demonstrate that the proposed method achieves much better performance than existing TEM denoising methods and TTA methods.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13151v1" target="_blank" rel="noopener noreferrer">
                中央凹注视提升隐写术的有效载荷容量
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Foveation Improves Payload Capacity in Steganography
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Lifeng Qiu Lin, Henry Kam, Qi Sun, Kaan Akşit
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于隐写术（信息隐藏技术），属于安全/隐私领域，与推荐系统、搜索或广告的核心技术完全无关。隐写术主要用于秘密通信和数据保护，不涉及任何推荐算法、搜索技术、广告排名或LLM相关应用。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 05:00:59
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13151v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13151v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">cs.GR</span><span class="category-tag">I.2.10; I.4</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Steganography finds its use in visual medium such as providing metadata and watermarking. With support of efficient latent representations and foveated rendering, we trained models that improve existing capacity limits from 100 to 500 bits, while achieving better accuracy of up to 1 failure bit out of 2000, at 200K test bits. Finally, we achieve a comparable visual quality of 31.47 dB PSNR and 0.13 LPIPS, showing the effectiveness of novel perceptual design in creating multi-modal latent representations in steganography.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13137v1" target="_blank" rel="noopener noreferrer">
                基于深度学习的实时手语到文本翻译：LSTM与3D CNN的对比研究
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Real-Time Sign Language to text Translation using Deep Learning: A Comparative study of LSTM and 3D CNN
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Madhumati Pol, Anvay Anturkar, Anushka Khot, Ayush Andure, Aniruddha Ghosh, Anvi...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于手语翻译这一特定领域应用，属于计算机视觉和时序建模的交叉领域。虽然使用了深度学习技术，但与搜索、推荐、广告系统的核心进展或使能技术没有直接关联，也不涉及我关注的异构数据统一建模或Transformer架构改进。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 04:26:33
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13137v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13137v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This study investigates the performance of 3D Convolutional Neural Networks (3D CNNs) and Long Short-Term Memory (LSTM) networks for real-time American Sign Language (ASL) recognition. Though 3D CNNs are good at spatiotemporal feature extraction from video sequences, LSTMs are optimized for modeling temporal dependencies in sequential data. We evaluate both architectures on a dataset containing 1,200 ASL signs across 50 classes, comparing their accuracy, computational efficiency, and latency under similar training conditions. Experimental results demonstrate that 3D CNNs achieve 92.4% recognition accuracy but require 3.2% more processing time per frame compared to LSTMs, which maintain 86.7% accuracy with significantly lower resource consumption. The hybrid 3D CNNLSTM model shows decent performance, which suggests that context-dependent architecture selection is crucial for practical implementation.This project provides professional benchmarks for developing assistive technologies, highlighting trade-offs between recognition precision and real-time operational requirements in edge computing environments.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13109v1" target="_blank" rel="noopener noreferrer">
                VPREG：基于变分原理网格生成方法的微分同胚图像配准最优控制公式
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            VPREG: An Optimal Control Formulation for Diffeomorphic Image Registration Based on the Variational Principle Grid Generation Method
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Zicong Zhou, Baihan Zhao, Andreas Mang, Guojun Liao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学图像配准领域的最优控制方法，属于计算机视觉中的特定应用方向。虽然涉及变分方法和网格生成技术，但这些技术与推荐系统、搜索或广告的核心技术栈没有直接关联。论文内容纯粹针对医学图像处理，属于明确的无关领域。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 03:02:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13109v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13109v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span><span class="category-tag">math.OC</span><span class="category-tag">49J20</span><span class="category-tag">49K20</span><span class="category-tag">49N45</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    This paper introduces VPreg, a novel diffeomorphic image registration method. This work provides several improvements to our past work on mesh generation and diffeomorphic image registration. VPreg aims to achieve excellent registration accuracy while controlling the quality of the registration transformations. It ensures a positive Jacobian determinant of the spatial transformation and provides an accurate approximation of the inverse of the registration, a crucial property for many neuroimaging workflows. Unlike conventional methods, VPreg generates this inverse transformation within the group of diffeomorphisms rather than operating on the image space. The core of VPreg is a grid generation approach, referred to as \emph{Variational Principle} (VP), which constructs non-folding grids with prescribed Jacobian determinant and curl. These VP-generated grids guarantee diffeomorphic spatial transformations essential for computational anatomy and morphometry, and provide a more accurate inverse than existing methods. To assess the potential of the proposed approach, we conduct a performance analysis for 150 registrations of brain scans from the OASIS-1 dataset. Performance evaluation based on Dice scores for 35 regions of interest, along with an empirical analysis of the properties of the computed spatial transformations, demonstrates that VPreg outperforms state-of-the-art methods in terms of Dice scores, regularity properties of the computed transformation, and accuracy and consistency of the provided inverse map. We compare our results to ANTs-SyN, Freesurfer-Easyreg, and FSL-Fnirt.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13084v1" target="_blank" rel="noopener noreferrer">
                编辑您的兴趣：通过特征最相似传播实现高效视频编辑
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Edit-Your-Interest: Efficient Video Editing via Feature Most-Similar Propagation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Yi Zuo, Zitao Wang, Lingling Li, Xu Liu, Fang Liu, Licheng Jiao
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于视频编辑技术，涉及特征传播方法，与推荐系统、搜索或广告的核心领域进展、LLM技术或Transformer架构没有直接关联。视频编辑属于计算机视觉应用领域，不在当前关注的技术范畴内。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 01:55:32
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13084v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13084v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Text-to-image (T2I) diffusion models have recently demonstrated significant progress in video editing. However, existing video editing methods are severely limited by their high computational overhead and memory consumption. Furthermore, these approaches often sacrifice visual fidelity, leading to undesirable temporal inconsistencies and artifacts such as blurring and pronounced mosaic-like patterns. We propose Edit-Your-Interest, a lightweight, text-driven, zero-shot video editing method. Edit-Your-Interest introduces a spatio-temporal feature memory to cache features from previous frames, significantly reducing computational overhead compared to full-sequence spatio-temporal modeling approaches. Specifically, we first introduce a Spatio-Temporal Feature Memory bank (SFM), which is designed to efficiently cache and retain the crucial image tokens processed by spatial attention. Second, we propose the Feature Most-Similar Propagation (FMP) method. FMP propagates the most relevant tokens from previous frames to subsequent ones, preserving temporal consistency. Finally, we introduce an SFM update algorithm that continuously refreshes the cached features, ensuring their long-term relevance and effectiveness throughout the video sequence. Furthermore, we leverage cross-attention maps to automatically extract masks for the instances of interest. These masks are seamlessly integrated into the diffusion denoising process, enabling fine-grained control over target objects and allowing Edit-Your-Interest to perform highly accurate edits while robustly preserving the background integrity. Extensive experiments decisively demonstrate that the proposed Edit-Your-Interest outperforms state-of-the-art methods in both efficiency and visual fidelity, validating its superior effectiveness and practicality.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13080v1" target="_blank" rel="noopener noreferrer">
                扩散模型中的幻觉计数
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Counting Hallucinations in Diffusion Models
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Shuai Fu, Jian Zhou, Qi Chen, Huang Jing, Huy Anh Nguyen, Xiaohan Liu, Zhixiong ...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于扩散模型的幻觉评估，这属于纯粹的生成模型质量评估范畴，与推荐系统、搜索或广告的核心技术进展无关。论文标题明确指向幻觉计数这一特定评估任务，没有显示出在推荐、搜索或广告领域的潜在应用价值。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 01:48:04
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13080v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13080v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Diffusion probabilistic models (DPMs) have demonstrated remarkable progress in generative tasks, such as image and video synthesis. However, they still often produce hallucinated samples (hallucinations) that conflict with real-world knowledge, such as generating an implausible duplicate cup floating beside another cup. Despite their prevalence, the lack of feasible methodologies for systematically quantifying such hallucinations hinders progress in addressing this challenge and obscures potential pathways for designing next-generation generative models under factual constraints. In this work, we bridge this gap by focusing on a specific form of hallucination, which we term counting hallucination, referring to the generation of an incorrect number of instances or structured objects, such as a hand image with six fingers, despite such patterns being absent from the training data. To this end, we construct a dataset suite CountHalluSet, with well-defined counting criteria, comprising ToyShape, SimObject, and RealHand. Using these datasets, we develop a standardized evaluation protocol for quantifying counting hallucinations, and systematically examine how different sampling conditions in DPMs, including solver type, ODE solver order, sampling steps, and initial noise, affect counting hallucination levels. Furthermore, we analyze their correlation with common evaluation metrics such as FID, revealing that this widely used image quality metric fails to capture counting hallucinations consistently. This work aims to take the first step toward systematically quantifying hallucinations in diffusion models and offer new insights into the investigation of hallucination phenomena in image generation.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13075v1" target="_blank" rel="noopener noreferrer">
                通过内容对齐实现海马体分割的无监督域自适应
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Unsupervised Domain Adaptation via Content Alignment for Hippocampus Segmentation
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Hoda Kalabizadeh, Ludovica Griffanti, Pak-Hei Yeung, Ana I. L. Namburete, Nicola...
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于医学影像中的海马体分割和域自适应，属于医学/生物领域的特定应用。虽然提到了域自适应技术，但该技术应用于医学影像分割，与推荐系统、搜索或广告没有直接关联，也不涉及LLM、Transformer架构或异构数据处理。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 01:34:41
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13075v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13075v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Deep learning models for medical image segmentation often struggle when deployed across different datasets due to domain shifts - variations in both image appearance, known as style, and population-dependent anatomical characteristics, referred to as content. This paper presents a novel unsupervised domain adaptation framework that directly addresses domain shifts encountered in cross-domain hippocampus segmentation from MRI, with specific emphasis on content variations. Our approach combines efficient style harmonisation through z-normalisation with a bidirectional deformable image registration (DIR) strategy. The DIR network is jointly trained with segmentation and discriminator networks to guide the registration with respect to a region of interest and generate anatomically plausible transformations that align source images to the target domain. We validate our approach through comprehensive evaluations on both a synthetic dataset using Morpho-MNIST (for controlled validation of core principles) and three MRI hippocampus datasets representing populations with varying degrees of atrophy. Across all experiments, our method outperforms existing baselines. For hippocampus segmentation, when transferring from young, healthy populations to clinical dementia patients, our framework achieves up to 15% relative improvement in Dice score compared to standard augmentation methods, with the largest gains observed in scenarios with substantial content shift. These results highlight the efficacy of our approach for accurate hippocampus segmentation across diverse populations.
                </div>
            </details>
    </div>
</div><!--
 * @Author: Doragd doragd@users.noreply.github.com
 * @Date: 2025-10-09 23:23:38
 * @LastEditors: Doragd doragd@users.noreply.github.com
 * @LastEditTime: 2025-10-10 00:41:41
 * @FilePath: /Algorithm-Practice-in-Industry/paperBotV2/frontend/templates/normal_paper_template.html
 * @Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
-->
<div class="simple-paper-card p-3 collapsed-level-2">
    <div class="flex justify-between items-start mb-1">
        <h3 class="text-base font-medium text-primary hover:underline transition-colors">
            <a href="https://www.alphaxiv.org/abs/2510.13067v1" target="_blank" rel="noopener noreferrer">
                面向红外与可见光图像融合的方向感知多尺度梯度损失
            </a>
        </h3>
        <span class="score-badge bg-gray-100 text-gray-800">
            <i class="fa fa-star mr-1"></i>1/10
        </span>
    </div>
    
    <div class="paper-details">
        <div class="mb-2 text-base text-gray-700">
            Direction-aware multi-scale gradient loss for infrared and visible image fusion
        </div>
        
        <div class="mb-2 text-sm text-gray-600 italic">
            <i class="fa fa-user-circle-o text-gray-500 mr-1"></i>Kaixuan Yang, Wei Xiang, Zhenshuai Chen, Tong Jin, Yunpeng Liu
        </div>
        
        
        
        
        <div class="mb-2">
            <strong class="text-gray-700 text-sm"><i class="fa fa-thumbs-up text-green-500 mr-1"></i>个性化推荐理由:</strong>
            <p class="text-gray-600 text-sm mt-1">该论文专注于红外与可见光图像融合的计算机视觉任务，属于纯粹的视觉处理领域。论文内容涉及图像融合的损失函数设计，与推荐系统、搜索或广告的核心技术领域没有直接关联，也不涉及LLM、Transformer架构或异构数据建模等当前关注的技术方向。</p>
        </div>
        
        <div class="flex flex-wrap items-center text-xs text-gray-500 pt-2 border-t border-gray-100">
                <i class="fa fa-calendar-o mr-1"></i> 2025-10-15 01:26:39
                <span class="mx-2">|</span>
                <a href="https://arxiv.org/abs/2510.13067v1" target="_blank" rel="noopener noreferrer" class="text-primary hover:underline">
                    arXiv:2510.13067v1
                </a>
                <span class="mx-2">|</span>
                <div class="flex flex-wrap"><span class="category-tag">cs.CV</span></div>
            </div>
            
            
            <details class="border-t border-gray-200 pt-4 mt-4">
                 <summary class="text-sm text-primary cursor-pointer"> 
                     查看完整摘要 <i class="fa fa-chevron-down ml-1 text-xs"></i> 
                 </summary> 
                 <div class="abstract-content mt-2 p-3 bg-gray-50 rounded-md text-sm text-gray-700">
                    Infrared and visible image fusion aims to integrate complementary information from co-registered source images to produce a single, informative result. Most learning-based approaches train with a combination of structural similarity loss, intensity reconstruction loss, and a gradient-magnitude term. However, collapsing gradients to their magnitude removes directional information, yielding ambiguous supervision and suboptimal edge fidelity. We introduce a direction-aware, multi-scale gradient loss that supervises horizontal and vertical components separately and preserves their sign across scales. This axis-wise, sign-preserving objective provides clear directional guidance at both fine and coarse resolutions, promoting sharper, better-aligned edges and richer texture preservation without changing model architectures or training protocols. Experiments on open-source model and multiple public benchmarks demonstrate effectiveness of our approach.
                </div>
            </details>
    </div>
</div>
        </div>
    </main>

    <!-- 加载论文数据和JavaScript逻辑 -->
    <script src="static/app.js"></script>

    <script>
        document.addEventListener('DOMContentLoaded', function() {
            // 在精选论文和普通论文之间添加展开/折叠按钮
            const papersContainer = document.querySelector('#papers-container');
            if (papersContainer) {
                // 添加展开/折叠全部按钮
                const expandAllButton = document.createElement('div');
                expandAllButton.className = 'expand-toggle';
                expandAllButton.textContent = '展开/折叠全部非精选论文';
                expandAllButton.addEventListener('click', function() {
                    papersContainer.classList.toggle('expanded-all');
                    this.textContent = papersContainer.classList.contains('expanded-all') ? 
                        '收起全部非精选论文' : '展开全部非精选论文';
                    
                    // 更新所有论文标题前的图标状态
                    const collapsedPapers = papersContainer.querySelectorAll('.collapsed-level-1');
                    collapsedPapers.forEach(paper => {
                        const iconElement = paper.querySelector('.expand-icon');
                        if (iconElement) {
                            iconElement.className = papersContainer.classList.contains('expanded-all') ? 
                                'expand-icon fa fa-eye' : 'expand-icon fa fa-eye-slash';
                        }
                    });
                });
                
                // 找到第一个非精选论文的位置
                const firstNormalPaper = papersContainer.querySelector('.simple-paper-card');
                if (firstNormalPaper) {
                    papersContainer.insertBefore(expandAllButton, firstNormalPaper);
                }
                
                // 添加分割线用于展开分数<=1的论文
                const divider = document.createElement('div');
                divider.className = 'papers-divider';
                
                const dividerLabel = document.createElement('div');
                dividerLabel.className = 'papers-divider-label';
                dividerLabel.textContent = '点击展开更多论文（评分较低）';
                dividerLabel.addEventListener('click', function() {
                    papersContainer.classList.toggle('expanded-level-2');
                    this.textContent = papersContainer.classList.contains('expanded-level-2') ? 
                        '点击收起低分论文' : '点击展开更多论文（评分较低）';
                });
                
                divider.appendChild(dividerLabel);
                
                // 在所有非精选论文的最后一个元素后面添加分割线
                const normalPapers = papersContainer.querySelectorAll('.simple-paper-card');
                if (normalPapers.length > 0) {
                    const lastNormalPaper = normalPapers[normalPapers.length - 1];
                    papersContainer.insertBefore(divider, lastNormalPaper.nextSibling);
                }
            }
            
            // 为每个非精选论文添加点击标题展开/折叠详情的功能
            const collapsedPapers = document.querySelectorAll('.collapsed-level-1');
            collapsedPapers.forEach(paper => {
                const titleElement = paper.querySelector('h3');
                if (titleElement) {
                    titleElement.style.cursor = 'pointer';
                    
                    // 创建展开/折叠图标元素并设置样式
                    const iconElement = document.createElement('i');
                    iconElement.className = 'expand-icon fa fa-eye-slash cursor-pointer';
                    iconElement.style.marginRight = '8px';
                    
                    // 将图标插入到标题链接之前，作为同级元素
                    const linkElement = titleElement.querySelector('a');
                    if (linkElement) {
                        // 将图标直接添加到标题元素中，位于链接之前
                        titleElement.insertBefore(iconElement, linkElement);
                        
                        // 为图标单独添加点击事件处理展开/折叠
                        iconElement.addEventListener('click', function(e) {
                            e.stopPropagation(); // 阻止事件冒泡到标题元素
                            const details = paper.querySelector('.paper-details');
                            if (details) {
                                const isExpanded = details.style.display === 'block';
                                details.style.display = isExpanded ? 'none' : 'block';
                                
                                // 更新图标状态
                                this.className = isExpanded ? 
                                    'expand-icon fa fa-eye-slash cursor-pointer' : 'expand-icon fa fa-eye cursor-pointer';
                                this.style.marginRight = '8px';
                            }
                        });
                    }
                    
                    // 为标题元素添加点击事件，也可以展开/折叠，但会检查点击目标
                    titleElement.addEventListener('click', function(e) {
                        // 仅当点击的是标题本身（非链接、非图标）时才展开/折叠
                        if (!e.target.closest('a') && !e.target.closest('.expand-icon')) {
                            const details = paper.querySelector('.paper-details');
                            if (details) {
                                const isExpanded = details.style.display === 'block';
                                details.style.display = isExpanded ? 'none' : 'block';
                                
                                // 更新图标状态
                                const iconElement = this.querySelector('.expand-icon');
                                if (iconElement) {
                                    iconElement.className = isExpanded ? 
                                        'expand-icon fa fa-eye-slash cursor-pointer' : 'expand-icon fa fa-eye cursor-pointer';
                                    iconElement.style.marginRight = '8px';
                                }
                            }
                        }
                    });
                }
            });
            
            // 实现"仅显示精选"按钮功能
            const showSelectedButton = document.getElementById('show-selected');
            if (showSelectedButton) {
                showSelectedButton.addEventListener('click', function() {
                    // 显示所有精选论文，隐藏所有普通论文
                    const selectedPapers = document.querySelectorAll('.paper-card');
                    const normalPapers = document.querySelectorAll('.simple-paper-card');
                    
                    selectedPapers.forEach(paper => {
                        paper.style.display = 'block';
                    });
                    
                    normalPapers.forEach(paper => {
                        paper.style.display = 'none';
                    });
                    
                    // 更新显示计数
                    const displayCountElement = document.getElementById('display-count');
                    if (displayCountElement) {
                        displayCountElement.textContent = `显示 ${selectedPapers.length} 篇论文 (共 ${selectedPapers.length + normalPapers.length} 篇)`;
                    }
                    
                    // 更新按钮样式
                    this.className = 'px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors';
                    document.getElementById('show-all').className = 'px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors';
                    
                    // 隐藏展开/折叠按钮和分割线
                    const expandToggle = document.querySelector('.expand-toggle');
                    if (expandToggle) expandToggle.style.display = 'none';
                    
                    const papersDivider = document.querySelector('.papers-divider');
                    if (papersDivider) papersDivider.style.display = 'none';
                });
            }
            
            // 实现"全部论文"按钮功能
            const showAllButton = document.getElementById('show-all');
            if (showAllButton) {
                showAllButton.addEventListener('click', function() {
                    // 显示所有论文
                    const allPapers = document.querySelectorAll('.paper-card, .simple-paper-card');
                    allPapers.forEach(paper => {
                        paper.style.display = 'block';
                    });
                    
                    // 重置折叠状态
                    papersContainer.classList.remove('expanded-all');
                    
                    // 更新显示计数
                    const displayCountElement = document.getElementById('display-count');
                    if (displayCountElement) {
                        displayCountElement.textContent = `显示 ${allPapers.length} 篇论文 (共 ${allPapers.length} 篇)`;
                    }
                    
                    // 更新按钮样式
                    this.className = 'px-3 py-1 bg-primary text-white rounded text-sm hover:bg-primary/90 transition-colors';
                    document.getElementById('show-selected').className = 'px-3 py-1 bg-gray-200 text-gray-700 rounded text-sm hover:bg-gray-300 transition-colors';
                    
                    // 重新显示展开/折叠按钮和分割线
                    const expandToggle = document.querySelector('.expand-toggle');
                    if (expandToggle) {
                        expandToggle.style.display = 'block';
                        expandToggle.textContent = '展开全部非精选论文';
                    }
                    
                    const papersDivider = document.querySelector('.papers-divider');
                    if (papersDivider) papersDivider.style.display = 'block';
                });
            }
        });
    </script>
    <script>
    
    // 初始化日历
    document.addEventListener('DOMContentLoaded', () => {
        try {
            console.log('Attempting to initialize calendar...');
            initCalendar();
        } catch (error) {
            console.error('Error initializing calendar:', error);
        }
    });
    
    // 日历初始化函数
    function initCalendar() {
        const toggleBtn = document.getElementById('date-picker-toggle');
        const datePicker = document.getElementById('date-picker');
        const calendarGrid = document.getElementById('calendar-grid');
        const prevMonthBtn = document.getElementById('prev-month');
        const nextMonthBtn = document.getElementById('next-month');
        const currentMonthEl = document.getElementById('current-month');
        const selectedDateText = document.getElementById('selected-date-text');
        
        // 当前显示的日期（从页面获取）
        const currentDateStr = document.getElementById('current-date').textContent.trim().replace(/^\d+年|月|日/g, '');
        const currentDate = new Date(currentDateStr);
        let displayYear = currentDate.getFullYear();
        let displayMonth = currentDate.getMonth();
        
        // 有论文数据的日期列表
        const availableDates = ["20251014","20251022","20251023","20251015","20251024","20251009","20251010","20251016","20251021","20251017"];
        
        // 尝试从localStorage恢复选择状态
        const savedDate = localStorage.getItem('selectedDate');
        const savedYear = localStorage.getItem('selectedYear');
        const savedMonth = localStorage.getItem('selectedMonth');
        
        // 确保页面加载时显示当前选中的日期
        // 修复持久化问题：确保每次加载都能正确恢复选中状态
        if (savedDate) {
            selectedDateText.textContent = savedDate;
            if (savedYear) displayYear = parseInt(savedYear);
            if (savedMonth) displayMonth = parseInt(savedMonth);
        } else {
            // 首次加载时，将当前页面日期保存到localStorage
            const currentPageDate = currentDateStr.replace(/\//g, '-');
            selectedDateText.textContent = currentPageDate;
            localStorage.setItem('selectedDate', currentPageDate);
            localStorage.setItem('selectedYear', currentDate.getFullYear().toString());
            localStorage.setItem('selectedMonth', currentDate.getMonth().toString());
        }
    
        // 切换日历显示状态
        toggleBtn.addEventListener('click', (e) => {
            e.stopPropagation();
            
            // 显式控制hidden类的添加和移除
            if (datePicker.classList.contains('hidden')) {
                // 显示日历 - 确保移除hidden类
                datePicker.classList.remove('hidden');
                renderCalendar();
            } else {
                // 隐藏日历
                datePicker.classList.add('hidden');
            }
        });
        
        // 点击其他区域关闭日历
        document.addEventListener('click', () => {
            if (!datePicker.classList.contains('hidden')) {
                datePicker.classList.add('hidden');
            }
        });
        
        // 阻止日历内部点击事件冒泡
        datePicker.addEventListener('click', (e) => {
            e.stopPropagation();
        });
        
        // 上月和下月按钮
        prevMonthBtn.addEventListener('click', () => {
            displayMonth--;
            if (displayMonth < 0) {
                displayMonth = 11;
                displayYear--;
            }
            renderCalendar();
        });
        
        nextMonthBtn.addEventListener('click', () => {
            displayMonth++;
            if (displayMonth > 11) {
                displayMonth = 0;
                displayYear++;
            }
            renderCalendar();
        });
        
        /**
         * 渲染日历
         */
        function renderCalendar() {
            // 清空日历网格
            calendarGrid.innerHTML = '';
            
            // 更新当前月份显示
            const monthNames = ['1月', '2月', '3月', '4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月', '12月'];
            currentMonthEl.textContent = displayYear + '年' + monthNames[displayMonth];
            
            // 计算当前月份的第一天是星期几
            const firstDay = new Date(displayYear, displayMonth, 1);
            const firstDayOfWeek = firstDay.getDay();
            
            // 计算当前月份的天数
            const daysInMonth = new Date(displayYear, displayMonth + 1, 0).getDate();
            
            // 添加上月的占位天数
            for (let i = 0; i < firstDayOfWeek; i++) {
                const emptyDay = document.createElement('div');
                emptyDay.classList.add('py-1', 'text-gray-300');
                calendarGrid.appendChild(emptyDay);
            }
            
            // 获取当前日期（用于高亮显示）
            const today = new Date();
            today.setHours(0, 0, 0, 0);
            
            // 添加当前月份的天数
            for (let day = 1; day <= daysInMonth; day++) {
                const dayElement = document.createElement('div');
                const currentDateObj = new Date(displayYear, displayMonth, day);
                const dateStr = displayYear + String(displayMonth + 1).padStart(2, '0') + String(day).padStart(2, '0');
                const displayDateStr = displayYear + '-' + String(displayMonth + 1).padStart(2, '0') + '-' + String(day).padStart(2, '0');
                
                // 设置日期元素基本样式
                dayElement.textContent = day;
                
                // 检查该日期是否有论文数据
                const hasPapers = availableDates.includes(dateStr);
                
                if (hasPapers) {
                    // 有论文数据的日期样式
                    dayElement.classList.add('py-1', 'cursor-pointer', 'hover:bg-gray-100', 'rounded', 'bg-blue-50', 'font-medium');
                    
                    // 添加点击事件，跳转到对应日期的页面
                    dayElement.addEventListener('click', () => {
                        console.log('Date clicked:', displayDateStr);
                        selectedDateText.textContent = displayDateStr;
                        
                        // 保存选择状态到localStorage
                        localStorage.setItem('selectedDate', displayDateStr);
                        localStorage.setItem('selectedYear', displayYear.toString());
                        localStorage.setItem('selectedMonth', displayMonth.toString());
                        
                        datePicker.classList.add('hidden');
                        
                        // 构造目标URL并跳转
                        const targetUrl = 'arxiv_' + dateStr + '.html';
                        window.location.href = targetUrl;
                    });
                } else {
                    // 没有论文数据的日期样式（置灰不可点击）
                    dayElement.classList.add('py-1', 'text-gray-400', 'cursor-not-allowed');
                }
                
                // 高亮显示当天日期（覆盖之前的样式）
                if (currentDateObj.getTime() === today.getTime()) {
                    dayElement.classList.remove('bg-blue-50');
                    dayElement.classList.add('bg-primary', 'text-white', 'font-bold', 'shadow');
                    if (!hasPapers) {
                        // 当天没有论文时，仍然置灰但保持背景色
                        dayElement.classList.add('opacity-70');
                    }
                }
                
                // 高亮显示当前选中的日期
                if (displayDateStr === selectedDateText.textContent) {
                    dayElement.classList.add('font-bold', 'border-2', 'border-primary', 'rounded-lg', 'shadow-md');
                }
                
                // 增强有论文数据的日期样式，使其更明显
                if (hasPapers && currentDateObj.getTime() !== today.getTime()) {
                    dayElement.classList.add('bg-blue-100', 'hover:bg-blue-200', 'transition-colors', 'duration-200');
                }
                
                calendarGrid.appendChild(dayElement);
            }
        }
    }
    </script>
    </body>

</html>